In [16]:
import os
import json
from dotenv import load_dotenv
from IPython.display import display, Markdown, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI
import gradio as gr

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good")
else:
    print("There might be a problem with the API key")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good


In [3]:
links = fetch_website_links('https://huggingface.co')
links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/Qwen/Qwen-Image-2512',
 '/MiniMaxAI/MiniMax-M2.1',
 '/LGAI-EXAONE/K-EXAONE-236B-A23B',
 '/tencent/HY-MT1.5-1.8B',
 '/zai-org/GLM-4.7',
 '/models',
 '/spaces/Wan-AI/Wan2.2-Animate',
 '/spaces/prithivMLmods/Qwen-Image-Edit-2511-LoRAs-Fast',
 '/spaces/mrfakename/Z-Image-Turbo',
 '/spaces/selfit-camera/Omni-Image-Editor',
 '/spaces/tencent/HY-Motion-1.0',
 '/spaces',
 '/datasets/facebook/research-plan-gen',
 '/datasets/wikimedia/wikipedia',
 '/datasets/llm-jp/jhle',
 '/datasets/Anthropic/hh-rlhf',
 '/datasets/Idavidrein/gpqa',
 '/datasets',
 '/join',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/inference/models',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/transformers',

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(url):
    user_prompt="""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):
"""

    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [6]:
print(get_links_user_prompt('https://huggingface.co'))


Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):
/
/models
/datasets
/spaces
/docs
/enterprise
/pricing
/login
/join
/spaces
/models
/Qwen/Qwen-Image-2512
/MiniMaxAI/MiniMax-M2.1
/LGAI-EXAONE/K-EXAONE-236B-A23B
/tencent/HY-MT1.5-1.8B
/zai-org/GLM-4.7
/models
/spaces/Wan-AI/Wan2.2-Animate
/spaces/prithivMLmods/Qwen-Image-Edit-2511-LoRAs-Fast
/spaces/mrfakename/Z-Image-Turbo
/spaces/selfit-camera/Omni-Image-Editor
/spaces/tencent/HY-Motion-1.0
/spaces
/datasets/facebook/research-plan-gen
/datasets/wikimedia/wikipedia
/datasets/llm-jp/jhle
/datasets/Anthropic/hh-rlhf
/datasets/Idavidrein/gpqa
/datasets
/join
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/inference/models
/pricing#endpoints
/pricing#spaces
/pricing
/allenai
/f

In [7]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [8]:
select_relevant_links('https://huggingface.co')

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 13 relevant links


{'links': [{'type': 'about page', 'url': 'https://huggingface.co/brand'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'learn page', 'url': 'https://huggingface.co/learn'},
  {'type': 'community page', 'url': 'https://discuss.huggingface.co'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Discord page', 'url': 'https://huggingface.co/join/discord'},
  {'type': 'endpoints page', 'url': 'https://endpoints.huggingface.co'},
  {'type': 'status page', 'url': 'https://status.huggingface.co'}]}

In [9]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [10]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 9 relevant links
## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
Qwen/Qwen-Image-2512
Updated
5 days ago
‚Ä¢
12.1k
‚Ä¢
446
MiniMaxAI/MiniMax-M2.1
Updated
9 days ago
‚Ä¢
195k
‚Ä¢
856
LGAI-EXAONE/K-EXAONE-236B-A23B
Updated
1 day ago
‚Ä¢
1.52k
‚Ä¢
334
tencent/HY-MT1.5-1.8B
Updated
5 days ago
‚Ä¢
4.75k
‚Ä¢
301
zai-org/GLM-4.7
Updated
13 days ago
‚Ä¢
32.7k
‚Ä¢
1.46k
Browse 2M+ models
Spaces
Running
Featured
3.53k
Wan2.2 Animate
üëÅ
3.53k
Wan2.2 Animate
Running
on
Zero
MCP
Featured
212
Qwen-Image-Edit-2511-LoRAs-Fast
üéÉ
212
Demo of the Collection of Qwen Image Edit LoRAs
Running
on
Zero
1.02k
Z Imag

In [11]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [12]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000]
    return user_prompt

In [13]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 3 relevant links


'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nQwen/Qwen-Image-2512\nUpdated\n5 days ago\n‚Ä¢\n12.1k\n‚Ä¢\n446\nMiniMaxAI/MiniMax-M2.1\nUpdated\n9 days ago\n‚Ä¢\n195k\n‚Ä¢\n856\nLGAI-EXAONE/K-EXAONE-236B-A23B\nUpdated\n1 day ago\n‚Ä¢\n1.52k\n‚Ä¢\n334\ntencent/HY-MT1.5-1.8B\nUpdated\n5 days ago\n‚Ä¢\n4.75k\n‚Ä¢\n301\nzai-org/GLM-4.7\nUpdated\n13 days ago\n‚Ä¢\n32.7k\n‚Ä¢\n1.46k\nBrowse 2M+ models\nSpaces\nRunning\nFeatured\n3.53k\nWan2

In [23]:
def create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    result = ''
    for chunk in stream:
        result += chunk.choices[0].delta.content or ''
        yield result

In [None]:
name_input = gr.Textbox(label='Company name:')
url_input = gr.Textbox(label='Landing page URL including http:// or https://')
message_output = gr.Markdown(label='Response')

view = gr.Interface(
    fn=create_brochure,
    title='AI Brochure Generator',
    inputs=[name_input, url_input],
    outputs=[message_output],
    examples=[
            ["Hugging Face", "https://huggingface.co"],
        ], 
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7877
* To create a public link, set `share=True` in `launch()`.




Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 7 relevant links
