In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [3]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddo

In [9]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [10]:
# Anthropic has made their site harder to scrape, so I'm using HuggingFace..

huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/nanonets/Nanonets-OCR-s',
 '/MiniMaxAI/MiniMax-M1-80k',
 '/tencent/Hunyuan3D-2.1',
 '/Menlo/Jan-nano',
 '/echo840/MonkeyOCR',
 '/models',
 '/spaces/ilcve21/Sparc3D',
 '/spaces/enzostvs/deepsite',
 '/spaces/tencent/Hunyuan3D-2.1',
 '/spaces/MiniMaxAI/MiniMax-M1',
 '/spaces/nvidia/PartPacker',
 '/spaces',
 '/datasets/EssentialAI/essential-web-v1.0',
 '/datasets/institutional/institutional-books-1.0',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/nvidia/Nemotron-Personas',
 '/datasets/nvidia/AceReason-1.1-SFT',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/transformers',
 '/docs/diffusers',
 '/docs/

In [11]:
get_links("https://huggingface.co")

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'community discussion', 'url': 'https://discuss.huggingface.co'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

## Second step: make the brochure!

Assemble all the details into another prompt to GPT4-o

In [12]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [13]:
print(get_all_details("https://huggingface.co"))

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}]}
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
nanonets/Nanonets-OCR-s
Updated
about 14 hours ago
•
98.5k
•
968
MiniMaxAI/MiniMax-M1-80k
Updated
3 days ago
•
6.32k
•
456
tencent/Hunyuan3D-2.1
Updated
1 day ago
•
16k
•
395
Menlo

In [15]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [18]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [19]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nnanonets/Nanonets-OCR-s\nUpdated\nabout 16 hours ago\n•\n98.5k\n•\n970\nMiniMaxAI/MiniMax-M1-80k\nUpdated\n3 days ago\n•\n6.32k\n•\n457\ntencent/Hunyuan3D-2.1\nUpdated\n1 day ago\n•\n16k\n•\n396\nMenlo/Jan-nano\nUpdated\n4 days ago\n•\n20.4k\n•\n320\necho840/MonkeyOCR\nUpdated\n1 day ago\n•\n270\n•\n427\nBrowse 1M+ models\nSpaces\nRunning\n566\n566\nSparc3D\n🏃\nNext-Gen High-Resolution 3D 

In [20]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [21]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'company page', 'url': 'https://huggingface.co'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'github page', 'url': 'https://github.com/huggingface'}, {'type': 'linkedin page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'twitter page', 'url': 'https://twitter.com/huggingface'}]}


```markdown
# Hugging Face Brochure

## Welcome to Hugging Face

**The AI Community Building the Future.**  
Hugging Face is the premier platform where the machine learning community collaborates on models, datasets, and applications to drive innovation and discovery in artificial intelligence.

---

## Our Offerings

### **Models**
Explore and leverage over **1 million models** ranging from highly-specialized applications to general-purpose solutions. Some of our trending models this week include:
- **nanonets/Nanonets-OCR-s** - A model for Optical Character Recognition, updated regularly with cutting-edge performance.
- **MiniMaxAI/MiniMax-M1-80k** - A powerful AI model for varied applications.
- **Hunyuan3D-2.1 & Menlo/Jan-nano** - Advanced models for 3D generation and textual analysis.

### **Datasets**
Access our library of **250k+ datasets** crucial for training and benchmarking AI models. Our datasets cater to various domains and provide a rich foundation for ML tasks.

### **Spaces**
Hugging Face hosts a space for running applications in real-time, including:
- **Sparc3D** for high-resolution 3D model generation.
- **DeepSite v2** for generating applications from text prompts.

---

## Our Community

Join a vibrant and active community comprising over **50,000 organizations** including industry giants like Amazon, Google, Microsoft, and more than **7,000 followers** across diverse model contributors. Collaborate, learn, and share with peers passionate about AI and machine learning.

> **"The collaboration platform for ML professionals."**  

---

## Company Culture

At Hugging Face, we foster a culture of **collaboration**, **innovation**, and **inclusivity**. Our commitment to open-source philosophy promotes transparency and collective progress. We believe in empowering individuals to realize their potential within a supportive environment while striving towards shared goals. 

---

## Careers at Hugging Face

We are always on the lookout for **talented individuals** who share our vision of building the future of AI. Available opportunities span across various domains including engineering, marketing, and community engagement. Embrace the chance to grow your career in a field that is transforming the world!

### Current Open Positions:
- Machine Learning Engineer
- Community Manager
- Product Designer

Join us in making a significant impact in the AI landscape!

---

## Connect With Us

To learn more, collaborate, or become part of our thriving community, visit our website at [Hugging Face](https://huggingface.co/) and follow us on social media:
- **Twitter:** [@HuggingFace](https://twitter.com/huggingface)
- **LinkedIn:** [Hugging Face on LinkedIn](https://www.linkedin.com/company/huggingface)
- **Discord:** Join our engaging conversations.

---

**Hugging Face: Building the Future of AI Together.**
```


## Finally - a minor improvement

With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [22]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [23]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/about'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}]}


# Hugging Face Company Brochure

---

## Welcome to Hugging Face

**The AI community building the future.** Hugging Face is a collaborative platform at the forefront of the machine learning revolution, providing a space for developers, researchers, and organizations to create, share, and enhance models, datasets, and applications.

---

### What We Offer

- **Models & Datasets**: Access over **1 million ML models** and **250k datasets** tailored for diverse AI tasks.
- **AI Spaces**: A unique environment to build and deploy applications seamlessly, featuring a wide range of cutting-edge functionalities.
- **Collaboration**: Partner with a vibrant community of over **50,000 organizations**, including Fortune 500 giants like Google, Microsoft, and Amazon.

#### Paid Services for Enterprises
- **Compute Solutions**: Start deploying optimized inference endpoints from just **$0.60/hour for GPU.**
- **Enterprise Support**: Premium plans starting at **$20/user/month**, with advanced security features, access controls, and dedicated support.

---

### Company Culture

At Hugging Face, our mission is to democratize machine learning by building open-source tools and fostering collaboration within the community. We believe in transparency, innovation, and inclusiveness, which cultivates an environment where creativity and knowledge thrive. Join us in our quest to push the boundaries of AI!

---

### Join Our Team

Hugging Face is constantly seeking passionate individuals to join our mission. Whether you're an engineer, researcher, marketing expert, or an AI enthusiast, there’s a place for you. By becoming a member of our team, you’ll not only advance your career but also contribute to shaping the future of AI.

**Explore Current Openings:** [Careers at Hugging Face](#)

---

### Our Clients

We empower a multitude of sectors with our cutting-edge solutions. Our customers range from startups to established enterprises, including:

- **AI & ML-oriented companies**: e.g., Ai2, Meta, Grammarly, Writer.
- **Tech giants**: e.g., Amazon, Google, Intel, Microsoft.

Join the ranks of industry leaders who trust Hugging Face to transform their AI implementation strategies.

---

### Connect with Us

Stay updated with our latest innovations, collaborative efforts, and community events by following us on our social platforms:

- [GitHub](https://github.com/huggingface)
- [Twitter](https://twitter.com/huggingface)
- [LinkedIn](https://www.linkedin.com/company/hugging-face)
- [Discord](https://discord.gg/huggingface)

---

**Explore the Future of AI with Hugging Face—Together, we can achieve greatness!**

### Get Started Today!

**[Sign Up](#)** | **[Explore AI Apps](#)** | **[Contact Us](#)** 

--- 

*This brochure was created with the latest data available as of October 2023.*

In [24]:
# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:

stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}


# Hugging Face Company Brochure

## Welcome to Hugging Face

### The AI Community Building the Future
At Hugging Face, we are dedicated to democratizing machine learning (ML) and fostering a collaborative community where developers, researchers, and organizations can come together to create, discover, and innovate in the realm of artificial intelligence.

---

### What We Offer

- **Models**: Access a vast library of over **1 million machine learning models** across various modalities, including text, image, video, audio, and even 3D.
  
- **Datasets**: Explore **250,000+ datasets** essential for training ML models - a comprehensive resource for all your data needs.

- **Spaces**: Utilize our platform to host and collaborate on public models and applications seamlessly.

- **Enterprise Solutions**: We offer enterprise-grade solutions tailored for organizations to build AI with enhanced security, access controls, and dedicated support.

---

### Customer Base
Join the ranks of over **50,000 organizations** utilizing Hugging Face, including tech giants like:
- Google
- Microsoft
- Amazon
- Meta
- Grammarly

Each entity benefits from our unmatched resources in AI, evolving their operations through machine learning.

---

### Company Culture
At Hugging Face, we strongly believe in the power of **open-source collaboration**. Our culture is built on the principles of transparency, inclusivity, and community engagement. Every member contributes to our mission of democratizing good machine learning, and we welcome new ideas and innovations.

---

### Careers & Opportunities
We are constantly seeking passionate individuals to join our team. If you are excited by the prospect of working in machine learning and AI, consider a career with us! We offer numerous opportunities for growth and development within an inclusive and dynamic work environment.

Explore our job openings and become part of a team that is committed to shaping the future of AI. 

---

### Join Us
To learn more or to get involved:
- **Visit Our Website**: [Hugging Face](https://huggingface.co)
- **Connect with us on Social Media**: [Twitter](https://twitter.com/huggingface) | [LinkedIn](https://www.linkedin.com/company/huggingface) | [Discord](https://discord.com/invite/huggingface)

---

Let’s build the future of AI together! Join us in pushing the boundaries of machine learning.