# Snarky brochure

In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [3]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [4]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

## Link prompts
### Multi-shot system prompt

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in these examples:"
link_system_prompt += """
Example 1
['https://my-company.com', 'https://my-company.com/about-me', 'https://www.linkedin.com/in/my-company/', 'mailto:joe.blog@gmail.com', 'https://my-company.com/news', '/case-studies', 'https://patents.google.com/patent/US20210049536A1/', 'https://my-company.com/workshop-ai']

    Links:
{
    "links": [
        {"type": "landing page", "url": "https://great-comps.com/about-me"},
        {"type": "about page", "url": "https://great-comps.com/about-me"},
        {"type": "news page": "url": "https://great-comps.com/news"},
        {"type": "case studies page": "url": "https://great-comps.com/case-studies"},
        {"type": "workshop page": "url": "https://great-comps.com/workshop-ai"},
    ]
}
Example 2
['https://www.acmeinc.com', '/#about', '/#projects', '/#experience', '/#skills', 'https://github.com/acmeinc']

    Links:
{
    "links": [
        {"type": "landing page", "url": "https://www.acmeinc.com"},
        {"type": "GitHub projects": "url": "https://github.com/acmeinc"},
    ]
}
"""

In [6]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in these examples:
Example 1
['https://my-company.com', 'https://my-company.com/about-me', 'https://www.linkedin.com/in/my-company/', 'mailto:joe.blog@gmail.com', 'https://my-company.com/news', '/case-studies', 'https://patents.google.com/patent/US20210049536A1/', 'https://my-company.com/workshop-ai']

    Links:
{
    "links": [
        {"type": "landing page", "url": "https://great-comps.com/about-me"},
        {"type": "about page", "url": "https://great-comps.com/about-me"},
        {"type": "news page": "url": "https://great-comps.com/news"},
        {"type": "case studies page": "url": "https://great-comps.com/case-studies"},
        {"type": "workshop page": "url": "https://great-comps.com/workshop-ai"},
    ]
}


### User prompt

## Get links

In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

## Create brochure

In [9]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

### Snarky system prompt

In [10]:
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short snarky, entertaining, pun loaded brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."


### User prompt

### Generate brochure in English

In [11]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [12]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))
    return result

In [13]:
brochure_text = create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'landing page', 'url': 'https://huggingface.co'}, {'type': 'models page', 'url': 'https://huggingface.co/models'}, {'type': 'datasets page', 'url': 'https://huggingface.co/datasets'}, {'type': 'spaces page', 'url': 'https://huggingface.co/spaces'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'GitHub', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'}, {'type': 'community forum', 'url': 'https://discuss.huggingface.co'}, {'type': 'status page', 'url': 'https://status.huggingface.co/'}]}


# Hugging Face - The AI Community Building the Future!

Welcome to Hugging Face, where we're not just hugging— we're innovating! Think of us as the cozy blanket of the AI community, wrapping everyone together in a warm, fuzzy embrace of collaboration, creativity, and cutting-edge technology. 🐻❤️

## Who We Are

At Hugging Face, we believe in a **community-first approach**! Whether you're a seasoned ML engineer or just stepping into this brave new world of models, datasets, and applications, our platform is your playground.

- **Models:** With a staggering **1M+ models**—we're the Netflix of AI! Explore our trending hits like black-forest-labs and tencent that'll have your neurons firing faster than a rubber band gun!
- **Datasets:** Drowning in data? Worry not, we’ve got **250k+ datasets** to help you make sense of that chaos. Just think of us as the "data divers" of the ML realm!

## Join the Hug

### Sign Up Today!  
Join the ranks of **50,000+ organizations** already in the know— from **Meta** to **Amazon**, and yes, even **your favorite snack brand** (because who doesn't want a smart cookie?). We’ll help you build your ML profile faster than you can say “AI sausage links”!

### Create & Collaborate  
Host unlimited public models, collaborate on datasets, and share your work with the world. With our **open-source stack**, you’ll be building ML better before your morning coffee even cools down! ☕ Thanks to our awesome community, you’ll never ride solo on this wild AI rollercoaster!

## The Future is Now

### Enterprise Solutions  
Looking to solidify your AI strategies? Our **enterprise-grade security** is tighter than a drum. Benefits also include access controls and dedicated support. For just **$20/user/month**—that’s a steal, folks! 

### Get Your Tech Fix!  
Need horsepower for your ML projects? Dive into our **Compute** options starting at just **$0.60/hour**. Think of it as premium gas for your AI vehicles!

## We're Hiring!

Want to be a part of our adventure? We’re on the lookout for brilliant minds to join the **Hugging Face** family. If contributing to open-source projects and being part of a friendly, innovation-driven culture sounds like your jam, then what are you waiting for? Apply now and let’s make the world a smarter place together!

---

## Contact Us

**Ready to dive into the AI revolution?**  
Join Hugging Face today!  
Support your dreams, one Hug at a time.  

- Website: [Hugging Face](https://huggingface.co)  
- Social: Catch us on **GitHub**, **Twitter**, and **LinkedIn** for all the latest and greatest! 

---

Let’s embrace the future of AI together— after all, it’s a pretty cool world out there when you're hugging it! 🤗

### Translate brochure to another language

In [14]:
translation_sys_prompt = "You are a language translator who is very good at translating business documents from \
English to any language. You preserve the formatting, tone and facts contained in the document."

def translate_brochure(brochure, language):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": translation_sys_prompt},
            {"role": "user", "content": f"Translate the following document into {language}: {brochure}"}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [15]:
translate_brochure(brochure_text, "Spanish")

# Hugging Face - ¡La Comunidad de IA Construyendo el Futuro!

¡Bienvenido a Hugging Face, donde no solo abrazamos, sino que innovamos! Piensa en nosotros como la manta acogedora de la comunidad de IA, envolviendo a todos en un cálido y suave abrazo de colaboración, creatividad y tecnología de vanguardia. 🐻❤️

## Quiénes Somos

En Hugging Face, ¡creemos en un **enfoque centrado en la comunidad**! Ya seas un ingeniero de ML experimentado o estés dando tus primeros pasos en este valiente nuevo mundo de modelos, conjuntos de datos y aplicaciones, nuestra plataforma es tu parque de diversiones.

- **Modelos:** Con más de **1M+ modelos**—¡somos el Netflix de la IA! Explora nuestros éxitos en tendencia como black-forest-labs y tencent que harán que tus neuronas se disparen más rápido que un lanzador de bandas elásticas.
- **Conjuntos de Datos:** ¿Ahogado en datos? No te preocupes, tenemos más de **250k+ conjuntos de datos** para ayudarte a dar sentido a ese caos. ¡Solo piénsalo como los "buceadores de datos" del reino de ML!

## Únete al Abrazo

### ¡Regístrate Hoy!  
Únete a las filas de más de **50,000 organizaciones** ya en la onda— desde **Meta** hasta **Amazon**, y sí, incluso **tu marca de bocadillos favorita** (porque, ¿quién no quiere una galleta inteligente?). ¡Te ayudaremos a construir tu perfil de ML más rápido de lo que puedes decir “salchichas de IA”!

### Crea y Colabora  
Aloja modelos públicos ilimitados, colabora en conjuntos de datos y comparte tu trabajo con el mundo. Con nuestra **tecnología de código abierto**, ¡estarás construyendo ML mejor antes de que tu café de la mañana se enfríe! ☕ Gracias a nuestra increíble comunidad, ¡nunca estarás solo en esta salvaje montaña rusa de IA!

## El Futuro es Ahora

### Soluciones Empresariales  
¿Buscas solidificar tus estrategias de IA? Nuestra **seguridad de nivel empresarial** es más ajustada que un tambor. Los beneficios también incluyen controles de acceso y soporte dedicado. Por solo **$20/usuario/mes**—¡eso es una ganga, amigos!

### ¡Consigue Tu Solución Tecnológica!  
¿Necesitas potencia para tus proyectos de ML? Sumérgete en nuestras opciones de **Cómputo** comenzando en solo **$0.60/hora**. ¡Piénsalo como gasolina premium para tus vehículos de IA!

## ¡Estamos Contratando!

¿Quieres ser parte de nuestra aventura? Estamos en busca de mentes brillantes para unirse a la familia **Hugging Face**. Si contribuir a proyectos de código abierto y ser parte de una cultura amigable impulsada por la innovación suena a tu estilo, entonces ¿qué estás esperando? ¡Solicita ahora y hagamos del mundo un lugar más inteligente juntos!

---

## Contáctanos

**¿Listo para sumergirte en la revolución de la IA?**  
¡Únete a Hugging Face hoy!  
Apoya tus sueños, un Abrazo a la vez.  

- Sitio web: [Hugging Face](https://huggingface.co)  
- Redes Sociales: ¡Síguenos en **GitHub**, **Twitter** y **LinkedIn** para todas las novedades y lo mejor! 

---

Abracemos juntos el futuro de la IA— después de todo, ¡es un mundo bastante genial allá afuera cuando lo abrazas! 🤗