| import streamlit as st |
| import requests |
| import base64 |
| import os |
| import asyncio |
| from huggingface_hub import HfApi |
| import plotly.express as px |
| import zipfile |
|
|
| |
| api = HfApi() |
|
|
| |
| HTML_DIR = "generated_html_pages" |
| if not os.path.exists(HTML_DIR): |
| os.makedirs(HTML_DIR) |
|
|
| |
| ZIP_DIR = "generated_zips" |
| if not os.path.exists(ZIP_DIR): |
| os.makedirs(ZIP_DIR) |
|
|
| |
| default_users = { |
| "users": [ |
| "awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos", |
| "cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin", |
| "phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488", |
| "TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff", |
| "ccdv", "haonan-li", "chansung", "lukaemon", "hails", |
| "pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans" |
| ] |
| } |
|
|
| |
| async def fetch_user_content(username): |
| try: |
| |
| models = list(await asyncio.to_thread(api.list_models, author=username)) |
| datasets = list(await asyncio.to_thread(api.list_datasets, author=username)) |
| return { |
| "username": username, |
| "models": models, |
| "datasets": datasets |
| } |
| except Exception as e: |
| return {"username": username, "error": str(e)} |
|
|
| |
| def download_user_page(username): |
| url = f"https://huggingface.co/{username}" |
| try: |
| response = requests.get(url) |
| response.raise_for_status() |
| html_content = response.text |
| html_file_path = os.path.join(HTML_DIR, f"{username}.html") |
| with open(html_file_path, "w", encoding='utf-8') as html_file: |
| html_file.write(html_content) |
| return html_file_path, None |
| except Exception as e: |
| return None, str(e) |
|
|
| |
| @st.cache_resource |
| def create_zip_of_files(files): |
| zip_name = "HuggingFace_User_Pages.zip" |
| zip_file_path = os.path.join(ZIP_DIR, zip_name) |
| with zipfile.ZipFile(zip_file_path, 'w') as zipf: |
| for file in files: |
| |
| zipf.write(file, arcname=os.path.basename(file)) |
| return zip_file_path |
|
|
| |
| @st.cache_resource |
| def get_zip_download_link(zip_file): |
| with open(zip_file, 'rb') as f: |
| data = f.read() |
| b64 = base64.b64encode(data).decode() |
| href = f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(zip_file)}">📥 Download All HTML Pages as ZIP</a>' |
| return href |
|
|
| |
| async def fetch_all_users(usernames): |
| tasks = [fetch_user_content(username) for username in usernames] |
| return await asyncio.gather(*tasks) |
|
|
| |
| def get_all_html_files(usernames): |
| html_files = [] |
| errors = {} |
| for username in usernames: |
| html_file, error = download_user_page(username) |
| if html_file: |
| html_files.append(html_file) |
| else: |
| errors[username] = error |
| return html_files, errors |
|
|
| |
| st.title("Hugging Face User Page Downloader & Zipper 📄➕📦") |
|
|
| |
| user_input = st.text_area( |
| "Enter Hugging Face usernames (one per line):", |
| value="\n".join(default_users["users"]), |
| height=300 |
| ) |
|
|
| |
| if st.button("Show User Content"): |
| if user_input: |
| username_list = [username.strip() for username in user_input.split('\n') if username.strip()] |
| |
| |
| user_data_list = asyncio.run(fetch_all_users(username_list)) |
| |
| |
| stats = {"username": [], "models_count": [], "datasets_count": []} |
| |
| |
| successful_html_files = [] |
| |
| st.markdown("### User Content Overview") |
| for user_data in user_data_list: |
| username = user_data["username"] |
| with st.container(): |
| |
| st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})") |
| |
| if "error" in user_data: |
| st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️") |
| else: |
| models = user_data["models"] |
| datasets = user_data["datasets"] |
| |
| |
| html_file_path, download_error = download_user_page(username) |
| if html_file_path: |
| successful_html_files.append(html_file_path) |
| st.success(f"✅ Successfully downloaded {username}'s page.") |
| else: |
| st.error(f"❌ Failed to download {username}'s page: {download_error}") |
| |
| |
| stats["username"].append(username) |
| stats["models_count"].append(len(models)) |
| stats["datasets_count"].append(len(datasets)) |
| |
| |
| with st.expander(f"🧠 Models ({len(models)})", expanded=False): |
| if models: |
| for model in models: |
| model_name = model.modelId.split("/")[-1] |
| st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})") |
| else: |
| st.markdown("No models found. 🤷♂️") |
| |
| |
| with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False): |
| if datasets: |
| for dataset in datasets: |
| dataset_name = dataset.id.split("/")[-1] |
| st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})") |
| else: |
| st.markdown("No datasets found. 🤷♀️") |
| |
| st.markdown("---") |
| |
| |
| if successful_html_files: |
| |
| zip_file_path = create_zip_of_files(successful_html_files) |
| |
| |
| zip_download_link = get_zip_download_link(zip_file_path) |
| st.markdown(zip_download_link, unsafe_allow_html=True) |
| else: |
| st.warning("No HTML files were successfully downloaded to create a ZIP archive.") |
| |
| |
| if stats["username"]: |
| st.markdown("### User Content Statistics") |
| |
| |
| fig_models = px.bar( |
| x=stats["username"], |
| y=stats["models_count"], |
| labels={'x': 'Username', 'y': 'Number of Models'}, |
| title="Number of Models per User" |
| ) |
| st.plotly_chart(fig_models) |
| |
| |
| fig_datasets = px.bar( |
| x=stats["username"], |
| y=stats["datasets_count"], |
| labels={'x': 'Username', 'y': 'Number of Datasets'}, |
| title="Number of Datasets per User" |
| ) |
| st.plotly_chart(fig_datasets) |
| |
| else: |
| st.warning("Please enter at least one username. Don't be shy! 😅") |
|
|
| |
| st.sidebar.markdown(""" |
| ## How to use: |
| 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames. |
| 2. Click **'Show User Content'**. |
| 3. View each user's models and datasets along with a link to their Hugging Face profile. |
| 4. **Download a ZIP archive** containing all the HTML pages by clicking the download link. |
| 5. Check out the statistics visualizations below! |
| """) |
|
|