In [2]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scrapper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [3]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [4]:
MODEL = 'gpt-5-nano'
openai = OpenAI()

In [5]:
links = fetch_website_links("https://edwarddonner.com")

In [6]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [8]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [9]:
select_relevant_links("https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 12 relevant links


{'links': [{'type': 'homepage', 'url': 'https://huggingface.co/'},
  {'type': 'brand/about page', 'url': 'https://huggingface.co/brand'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'community forum', 'url': 'https://discuss.huggingface.co'},
  {'type': 'status page', 'url': 'https://status.huggingface.co/'},
  {'type': 'Discord community', 'url': 'https://huggingface.co/join/discord'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'}]}

In [14]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [10]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [11]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [12]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [15]:
stream_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 14 relevant links


# Hugging Face Brochure

---

## About Hugging Face

**Hugging Face** is the leading collaboration platform and community for the machine learning (ML) and artificial intelligence (AI) ecosystem. It is equipped as a central hub where ML engineers, scientists, and enthusiasts share, discover, experiment, and collaborate on open-source ML models, datasets, and applications. Hugging Face is shaping the future of AI with an open and ethical approach, powering the next generation of AI innovation.

---

## What We Offer

- **Models**: Access and contribute to over 1 million machine learning models across multiple modalities including text, image, video, audio, and 3D.
- **Datasets**: Explore a vast repository of more than 250,000 datasets to train, evaluate and experiment with machine learning algorithms.
- **Spaces**: Build and share ML-powered apps effortlessly using Hugging Face’s hosting platform.
- **Community**: Connect with a fast-growing, global AI community for collaboration, learning, and sharing.
- **Enterprise Solutions**: Scalable AI platform tailored for teams and large organizations, offering enterprise-grade security, analytics, flexible compute options, and dedicated support.

---

## Platform Highlights

- **Open Source at Core**: Leveraging the Hugging Face open-source stack ensures rapid development and innovation.
- **Multi-Modal Support**: Support for diverse data types and ML technologies—from natural language processing to image and video generation.
- **Portfolio Building**: Users can build and showcase their ML profile by sharing work directly on the platform.
- **Advanced Compute and Hosting**: Paid compute plans and enterprise-grade infrastructure, including ZeroGPU acceleration for enhanced performance.

---

## Enterprise Hub Features

- Secure Single Sign-On (SSO) integration for seamless identity management.
- Granular access controls with resource groups and customized token management.
- Comprehensive audit logs for governance and security oversight.
- Advanced analytics dashboard for usage tracking and optimization.
- Expandable private storage and data management tools.
- Flexible contract options tailored for teams and enterprises.
- Enhanced scalability via ZeroGPU quota boosts.

---

## Company Culture

Hugging Face thrives on openness, collaboration, and innovation. The company fosters a vibrant, inclusive community that coalesces around a shared vision of building ethical AI technologies for the benefit of all. It empowers contributors through open resources and believes in driving forward the AI revolution collectively.

---

## Customers and Community

- Hugging Face supports a diverse global user base including AI researchers, developers, enterprises, and educators.
- Leading organizations rely on Hugging Face for cutting-edge AI model deployment and experimentation.
- The platform hosts thousands of open-source projects, attracting millions of downloads and collaboration sessions monthly.

---

## Careers at Hugging Face

Join a passionate team actively pushing the boundaries of AI technology. Hugging Face seeks talent across machine learning research, software engineering, product management, and community engagement. Employees thrive in an environment that values innovation, ethical AI development, and global collaboration.

Explore current job openings and internship opportunities on the Hugging Face careers page.

---

## Connect with Hugging Face

- Website: https://huggingface.co
- GitHub, Twitter, LinkedIn, Discord for community interaction and updates.
- Extensive documentation, blog, and forum resources for learning and support.

---

### Brand Identity

- Vibrant colors reflecting innovation:  
  - Yellow (#FFD21E)  
  - Orange (#FF9D00)  
  - Gray (#6B7280)  
- Accessible brand assets available for partners and community use.

---

**Hugging Face: The AI community building the future, together.**