| import streamlit as st |
| import requests |
| import base64 |
| import os |
| import asyncio |
| from huggingface_hub import HfApi, snapshot_download |
| import plotly.express as px |
| import zipfile |
| import tempfile |
| import shutil |
| from bs4 import BeautifulSoup |
| from PIL import Image |
| import glob |
| from datetime import datetime |
| import pytz |
| from urllib.parse import quote |
|
|
| |
| api = HfApi() |
|
|
| |
| HTML_DIR = "generated_html_pages" |
| ZIP_DIR = "generated_zips" |
| SNAPSHOT_DIR = "snapshot_downloads" |
|
|
| for directory in [HTML_DIR, ZIP_DIR, SNAPSHOT_DIR]: |
| if not os.path.exists(directory): |
| os.makedirs(directory) |
|
|
| |
| default_users = { |
| "users": [ |
| "awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos", |
| "cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin", |
| "phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488", |
| "TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff", |
| "ccdv", "haonan-li", "chansung", "lukaemon", "hails", |
| "pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans" |
| ] |
| } |
|
|
| async def fetch_user_content(username): |
| try: |
| models = list(await asyncio.to_thread(api.list_models, author=username)) |
| datasets = list(await asyncio.to_thread(api.list_datasets, author=username)) |
| return { |
| "username": username, |
| "models": models, |
| "datasets": datasets |
| } |
| except Exception as e: |
| return {"username": username, "error": str(e)} |
|
|
| def download_user_page(username): |
| url = f"https://huggingface.co/{username}" |
| try: |
| response = requests.get(url) |
| response.raise_for_status() |
| html_content = response.text |
| html_file_path = os.path.join(HTML_DIR, f"{username}.html") |
| with open(html_file_path, "w", encoding='utf-8') as html_file: |
| html_file.write(html_content) |
| return html_file_path, html_content, None |
| except Exception as e: |
| return None, None, str(e) |
|
|
| @st.cache_resource |
| def create_zip_of_files(files, zip_name): |
| zip_file_path = os.path.join(ZIP_DIR, zip_name) |
| with zipfile.ZipFile(zip_file_path, 'w') as zipf: |
| for file in files: |
| zipf.write(file, arcname=os.path.basename(file)) |
| return zip_file_path |
|
|
| @st.cache_resource |
| def get_download_link(file_path, link_text): |
| with open(file_path, 'rb') as f: |
| data = f.read() |
| b64 = base64.b64encode(data).decode() |
| return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>' |
|
|
| async def fetch_all_users(usernames): |
| tasks = [fetch_user_content(username) for username in usernames] |
| return await asyncio.gather(*tasks) |
|
|
| def perform_snapshot_download(repo_id, repo_type): |
| try: |
| temp_dir = tempfile.mkdtemp() |
| snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=temp_dir) |
| zip_name = f"{repo_id.replace('/', '_')}_{repo_type}.zip" |
| zip_path = os.path.join(SNAPSHOT_DIR, zip_name) |
| shutil.make_archive(zip_path[:-4], 'zip', temp_dir) |
| shutil.rmtree(temp_dir) |
| return zip_path |
| except Exception as e: |
| return str(e) |
|
|
| |
| def display_html_grid(html_files): |
| num_columns = 3 |
| for i in range(0, len(html_files), num_columns): |
| cols = st.columns(num_columns) |
| for j in range(num_columns): |
| if i + j < len(html_files): |
| with cols[j]: |
| with open(html_files[i+j], 'r', encoding='utf-8') as file: |
| html_content = file.read() |
| soup = BeautifulSoup(html_content, 'html.parser') |
| st.subheader(f"Page: {os.path.basename(html_files[i+j])}") |
| st.components.v1.html(str(soup.body), height=300, scrolling=True) |
|
|
| |
| def display_images_from_html(html_file): |
| with open(html_file, 'r', encoding='utf-8') as file: |
| html_content = file.read() |
| soup = BeautifulSoup(html_content, 'html.parser') |
| images = soup.find_all('img') |
| for img in images: |
| src = img.get('src') |
| if src and src.startswith('http'): |
| |
| st.image(src, use_container_width=True) |
| |
| |
| def display_videos_from_html(html_file): |
| with open(html_file, 'r', encoding='utf-8') as file: |
| html_content = file.read() |
| soup = BeautifulSoup(html_content, 'html.parser') |
| videos = soup.find_all('video') |
| for video in videos: |
| src = video.find('source').get('src') |
| if src and src.startswith('http'): |
| st.video(src) |
|
|
| def main(): |
| st.title("🧑💼People🧠Models📚Datasets") |
|
|
| user_input = st.text_area( |
| "Enter Hugging Face usernames (one per line):", |
| value="\n".join(default_users["users"]), |
| height=300 |
| ) |
|
|
| if st.button("Show User Content and Download Snapshots"): |
| if user_input: |
| username_list = [username.strip() for username in user_input.split('\n') if username.strip()] |
| |
| user_data_list = asyncio.run(fetch_all_users(username_list)) |
| |
| stats = {"username": [], "models_count": [], "datasets_count": []} |
| successful_html_files = [] |
| snapshot_downloads = [] |
| |
| st.markdown("### User Content Overview") |
| for user_data in user_data_list: |
| username = user_data["username"] |
| with st.container(): |
| st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})") |
| |
| if "error" in user_data: |
| st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️") |
| else: |
| models = user_data["models"] |
| datasets = user_data["datasets"] |
| |
| html_file_path, html_content, download_error = download_user_page(username) |
| if html_file_path and html_content: |
| successful_html_files.append(html_file_path) |
| st.success(f"✅ Successfully downloaded {username}'s page.") |
| |
| |
| with st.expander(f"View {username}'s HTML page"): |
| st.markdown(html_content, unsafe_allow_html=True) |
| else: |
| st.error(f"❌ Failed to download {username}'s page: {download_error}") |
| |
| stats["username"].append(username) |
| stats["models_count"].append(len(models)) |
| stats["datasets_count"].append(len(datasets)) |
| |
| with st.expander(f"🧠 Models ({len(models)})", expanded=False): |
| if models: |
| for model in models: |
| model_name = model.modelId.split("/")[-1] |
| st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})") |
| if st.button(f"Download Snapshot: {model_name}", key=f"model_{model.modelId}"): |
| with st.spinner(f"Downloading snapshot for {model_name}..."): |
| result = perform_snapshot_download(model.modelId, "model") |
| if isinstance(result, str): |
| st.error(f"Failed to download {model_name}: {result}") |
| else: |
| snapshot_downloads.append(result) |
| st.success(f"Successfully downloaded snapshot for {model_name}") |
| else: |
| st.markdown("No models found. 🤷♂️") |
| |
| with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False): |
| if datasets: |
| for dataset in datasets: |
| dataset_name = dataset.id.split("/")[-1] |
| st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})") |
| if st.button(f"Download Snapshot: {dataset_name}", key=f"dataset_{dataset.id}"): |
| with st.spinner(f"Downloading snapshot for {dataset_name}..."): |
| result = perform_snapshot_download(dataset.id, "dataset") |
| if isinstance(result, str): |
| st.error(f"Failed to download {dataset_name}: {result}") |
| else: |
| snapshot_downloads.append(result) |
| st.success(f"Successfully downloaded snapshot for {dataset_name}") |
| else: |
| st.markdown("No datasets found. 🤷♀️") |
| |
| st.markdown("---") |
| |
| if successful_html_files: |
| st.markdown("### HTML Grid View") |
| display_html_grid(successful_html_files) |
|
|
| st.markdown("### Image Gallery") |
| for html_file in successful_html_files: |
| display_images_from_html(html_file) |
|
|
| st.markdown("### Video Gallery") |
| for html_file in successful_html_files: |
| display_videos_from_html(html_file) |
|
|
| html_zip_path = create_zip_of_files(successful_html_files, "HuggingFace_User_Pages.zip") |
| html_download_link = get_download_link(html_zip_path, "📥 Download All HTML Pages as ZIP") |
| st.markdown(html_download_link, unsafe_allow_html=True) |
| else: |
| st.warning("No HTML files were successfully downloaded to create a ZIP archive.") |
| |
| if snapshot_downloads: |
| snapshot_zip_path = create_zip_of_files(snapshot_downloads, "HuggingFace_Snapshots.zip") |
| snapshot_download_link = get_download_link(snapshot_zip_path, "📥 Download All Snapshots as ZIP") |
| st.markdown(snapshot_download_link, unsafe_allow_html=True) |
| |
| if stats["username"]: |
| st.markdown("### User Content Statistics") |
| |
| fig_models = px.bar( |
| x=stats["username"], |
| y=stats["models_count"], |
| labels={'x': 'Username', 'y': 'Number of Models'}, |
| title="Number of Models per User" |
| ) |
| st.plotly_chart(fig_models) |
| |
| fig_datasets = px.bar( |
| x=stats["username"], |
| y=stats["datasets_count"], |
| labels={'x': 'Username', 'y': 'Number of Datasets'}, |
| title="Number of Datasets per User" |
| ) |
| st.plotly_chart(fig_datasets) |
| |
| else: |
| st.warning("Please enter at least one username. Don't be shy! 😅") |
|
|
| st.sidebar.markdown(""" |
| ## How to use: |
| 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames. |
| 2. Click **'Show User Content and Download Snapshots'**. |
| 3. View each user's models and datasets along with a link to their Hugging Face profile. |
| 4. For each model or dataset, you can click the "Download Snapshot" button to download a snapshot. |
| 5. **Download ZIP archives** containing all the HTML pages and snapshots by clicking the download links. |
| 6. Check out the statistics visualizations below! |
| 7. **New features:** |
| - View all downloaded HTML pages in a grid layout |
| - Browse through image and video galleries extracted from the HTML pages |
| """) |
|
|
| if __name__ == "__main__": |
| main() |