In [1]:
import os
import json
from dotenv import load_dotenv
from IPython.display import display, Markdown, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [3]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good")
else:
    print("There might be a problem with the API key")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good


In [4]:
links = fetch_website_links('https://huggingface.co')
links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/MiniMaxAI/MiniMax-M2.1',
 '/tencent/HY-MT1.5-1.8B',
 '/zai-org/GLM-4.7',
 '/Qwen/Qwen-Image-2512',
 '/LGAI-EXAONE/K-EXAONE-236B-A23B',
 '/models',
 '/spaces/Wan-AI/Wan2.2-Animate',
 '/spaces/mrfakename/Z-Image-Turbo',
 '/spaces/microsoft/TRELLIS.2',
 '/spaces/selfit-camera/Omni-Image-Editor',
 '/spaces/prithivMLmods/Qwen-Image-Edit-2511-LoRAs-Fast',
 '/spaces',
 '/datasets/facebook/research-plan-gen',
 '/datasets/bigai/TongSIM-Asset',
 '/datasets/bshada/open-schematics',
 '/datasets/nebius/SWE-rebench-openhands-trajectories',
 '/datasets/gaia-benchmark/GAIA',
 '/datasets',
 '/join',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/inference/models',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly

In [5]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
def get_links_user_prompt(url):
    user_prompt="""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):
"""

    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [7]:
print(get_links_user_prompt('https://huggingface.co'))


Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):
/
/models
/datasets
/spaces
/docs
/enterprise
/pricing
/login
/join
/spaces
/models
/MiniMaxAI/MiniMax-M2.1
/tencent/HY-MT1.5-1.8B
/zai-org/GLM-4.7
/Qwen/Qwen-Image-2512
/LGAI-EXAONE/K-EXAONE-236B-A23B
/models
/spaces/Wan-AI/Wan2.2-Animate
/spaces/mrfakename/Z-Image-Turbo
/spaces/microsoft/TRELLIS.2
/spaces/selfit-camera/Omni-Image-Editor
/spaces/prithivMLmods/Qwen-Image-Edit-2511-LoRAs-Fast
/spaces
/datasets/facebook/research-plan-gen
/datasets/bigai/TongSIM-Asset
/datasets/bshada/open-schematics
/datasets/nebius/SWE-rebench-openhands-trajectories
/datasets/gaia-benchmark/GAIA
/datasets
/join
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/inference/models
/pricing#endpoints

In [10]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [11]:
select_relevant_links('https://huggingface.co')

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 12 relevant links


{'links': [{'type': 'about page', 'url': 'https://huggingface.co/brand'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'home page', 'url': 'https://huggingface.co/'},
  {'type': 'docs', 'url': 'https://huggingface.co/docs'},
  {'type': 'product page', 'url': 'https://endpoints.huggingface.co'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'},
  {'type': 'Discord', 'url': 'https://huggingface.co/join/discord'},
  {'type': 'Community forum', 'url': 'https://discuss.huggingface.co'},
  {'type': 'Status page', 'url': 'https://status.huggingface.co/'}]}

In [12]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [14]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 3 relevant links
## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
MiniMaxAI/MiniMax-M2.1
Updated
6 days ago
‚Ä¢
171k
‚Ä¢
778
zai-org/GLM-4.7
Updated
10 days ago
‚Ä¢
31.2k
‚Ä¢
1.39k
tencent/HY-MT1.5-1.8B
Updated
1 day ago
‚Ä¢
1.96k
‚Ä¢
474
Qwen/Qwen-Image-2512
Updated
2 days ago
‚Ä¢
5.83k
‚Ä¢
314
LGAI-EXAONE/K-EXAONE-236B-A23B
Updated
about 5 hours ago
‚Ä¢
250
‚Ä¢
272
Browse 2M+ models
Spaces
Running
Featured
3.37k
Wan2.2 Animate
üëÅ
3.37k
Wan2.2 Animate
Running
on
Zero
951
Z Image Turbo
üñº
951
Generate images from text prompts
Running
on
Zero
Featured
679
TRELLIS.2
üè¢
679
High-fidelity 3D Gen

In [15]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [17]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000]
    return user_prompt

In [18]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 3 relevant links


'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nMiniMaxAI/MiniMax-M2.1\nUpdated\n6 days ago\n‚Ä¢\n171k\n‚Ä¢\n778\nzai-org/GLM-4.7\nUpdated\n10 days ago\n‚Ä¢\n31.2k\n‚Ä¢\n1.39k\ntencent/HY-MT1.5-1.8B\nUpdated\n1 day ago\n‚Ä¢\n1.96k\n‚Ä¢\n474\nQwen/Qwen-Image-2512\nUpdated\n2 days ago\n‚Ä¢\n5.83k\n‚Ä¢\n314\nLGAI-EXAONE/K-EXAONE-236B-A23B\nUpdated\nabout 5 hours ago\n‚Ä¢\n250\n‚Ä¢\n272\nBrowse 2M+ models\nSpaces\nRunning\nFeatured\n3.37k\

In [19]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [20]:
create_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 10 relevant links


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


# Hugging Face

## The AI Community Building the Future

Hugging Face is a vibrant collaboration platform at the heart of the machine learning (ML) community. It empowers engineers, scientists, and AI enthusiasts to create, discover, and share open-source machine learning models, datasets, and applications. With a fast-growing global community and some of the most widely-used ML libraries and tools, Hugging Face drives the AI revolution forward‚Äîopenly and ethically.

---

## What We Offer

- **2 Million+ ML Models:** Explore a vast collection of models across various modalities including text, image, video, audio, and 3D.
- **500k+ Datasets:** Access and contribute to large datasets supporting diverse machine learning tasks.
- **1 Million+ Applications (Spaces):** Interactive ML demos and apps you can run or contribute to.
- **Open Collaboration:** Host unlimited public repositories for models, datasets, and applications to build and share your ML portfolio.
- **Enterprise-Grade Solutions:** Secure, scalable tools for teams and organizations to accelerate AI development with advanced access controls and dedicated support.

---

## Enterprise & Team Solutions

Hugging Face offers flexible subscription plans:

- **Team Plan:** Starting at $20 per user/month; designed for smaller teams needing collaboration tools and compute resources.
- **Enterprise Plan:** Custom contracts offering features such as:
  - Single Sign-On (SSO) for secure access
  - Region-specific data management
  - Audit logs and granular access control
  - Centralized token and billing management
  - Advanced compute options including 5x increased ZeroGPU quota
  - Private datasets viewer and storage expansion
  - Detailed usage analytics to optimize resource allocation  
All built with enterprise-grade security and compliance in mind.

---

## Company Culture

At Hugging Face, community and openness are core values. The platform fosters an inclusive, ethical, and collaborative culture where:
- Developers, researchers, and businesses share their work freely to advance AI for all.
- Innovation is community-driven, accelerating the pace of machine learning breakthroughs.
- Education and portfolio-building are encouraged through sharing and discovery.
- A talented science team pushes the technological edge to build the future of artificial intelligence.

---

## Our Customers & Community

Our users range from individual ML practitioners and researchers, to startups and Fortune 500 enterprises. Companies come to Hugging Face to:
- Leverage cutting-edge open-source models and datasets.
- Quickly prototype and deploy AI applications.
- Collaborate securely at scale across teams.
- Tap into the expertise of a large and active global AI community.

---

## Careers at Hugging Face

Join a team that is pioneering open and ethical AI. We look for:
- Software engineers passionate about ML infrastructure and tools.
- Research scientists advancing AI frontiers.
- Community managers, product designers, and business professionals eager to support and grow the AI ecosystem.

At Hugging Face, you will work on impactful projects with a talented, mission-driven team that values innovation, transparency, and collaboration.

---

## Connect With Us

- Visit our Hub: [huggingface.co](https://huggingface.co)  
- Join our Community: Active forums, GitHub repos, and Discord  
- Follow us on [Twitter](https://twitter.com/huggingface), [LinkedIn](https://linkedin.com/company/huggingface)

---

**Hugging Face‚ÄîYour partner in building the AI-powered future.**