#### Set up web Scraping function with BeautifulSoup

In [1]:
from bs4 import BeautifulSoup
import requests
import os
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

In [2]:


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]


def fetch_website_links(url):
    """
    Return the links on the webiste at the given url
    I realize this is inefficient as we're parsing twice! This is to keep the code in the lab simple.
    Feel free to use a class and optimize it!
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    links = [link.get("href") for link in soup.find_all("a")]
    return [link for link in links if link]


In [3]:
fetch_website_contents("https://cricbuzz.com")

"Women's Premier League 2026 | Live Cricket Score, Schedule, Latest News, Stats &amp; Videos | Cricbuzz.com\n\nMenu\nLive Scores\nSchedule\nArchives\nNews\nSeries\nTeams\nVideos\nRankings\nMore\nMATCHES\nPC\nvs\nPR\n-\nPR won\nMIW\nvs\nUPW\n-\nUPW won\nGGTW\nvs\nRCBW\n-\nPreview\nUSAU19\nvs\nINDU19\n-\nINDU19 won\nAUSU19\nvs\nIREU19\n-\nPreview\nALL\nAll\nLive Now\nToday\nLEAGUE\nSA20\nPretoria Capitals vs Paarl Royals\n25th Match\nMI Cape Town vs Sunrisers Eastern Cape\n26th Match\nBBL 2025-26\nPerth Scorchers vs Melbourne Renegades\n36th Match\nSydney Sixers vs Sydney Thunder\n37th Match\nBPL 2025-26\nChattogram Royals vs Noakhali Express\n25th Match\nRajshahi Warriors vs Sylhet Titans\n26th Match\nDhaka Capitals vs Rangpur Riders\n27th Match\nChattogram Royals vs Rajshahi Warriors\n28th Match\nSuper Smash 2025-26\nWellington vs Otago\n20th Match\nCentral Districts vs Auckland\n21st Match\nDOMESTIC\nICC Under 19 World Cup 2026\nZimbabwe U19 vs Scotland U19\n2nd Match, Group B\nUnited

In [4]:
#Load the env Variable whic is  in .env file


load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")



API key found and looks good so far!


#### Quick Call to api

In [5]:

message = "Hello, Ai! This is my first ever message to you! Hi!"
messages = [{"role": "user", "content": message}]
messages


[{'role': 'user',
  'content': 'Hello, Ai! This is my first ever message to you! Hi!'}]

In [6]:
openai = OpenAI()

response = openai.chat.completions.create(model="gpt-5-nano", messages=messages)
response.choices[0].message.content

'Hi there! Welcome‚Äîgreat to meet you. I‚Äôm here to chat, answer questions, explain things, help with writing or planning, and more. What would you like to do today? If you want, tell me a bit about your interests or a topic you‚Äôd like to explore.'

### Types of prompts

**A system prompt** that tells them what task they are performing and what tone they should use

**A user prompt** -- the conversation starter that they should reply to

In [7]:
system_prompt = """
You are a Professioal assistant that analyzes the contents of a website,
and provides a short,insighfull, with proper heading and sub heading , humorous summary, ignoring text that might be navigation related.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

In [8]:
user_prompt_prefix = """
Here are the contents of a website.
Provide a short summary of this website.
If it includes news or announcements, then summarize these too.

"""

### Messages

The API from OpenAI expects to receive messages in a particular structure.
Many of the other APIs share this structure:

```python
[
    {"role": "system", "content": "system message goes here"},
    {"role": "user", "content": "user message goes here"}
]
```
To give you a preview, the next 2 cells make a rather simple call - we won't stretch the mighty GPT (yet!)

In [9]:
messages = [
    {"role": "system", "content": "You are a flirty assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

response = openai.chat.completions.create(model="gpt-4.1-nano", messages=messages)
response.choices[0].message.content

'Well, 2 + 2 equals 4, but if you‚Äôre asking in a different context, I‚Äôd say the answer is just as charming as you are! Want to try a trick question instead?'

In [10]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + website}
    ]

In [11]:
cricbuzz=fetch_website_contents("https://cricbuzz.com")
messages_for(cricbuzz)

[{'role': 'system',
  'content': '\nYou are a Professioal assistant that analyzes the contents of a website,\nand provides a short,insighfull, with proper heading and sub heading , humorous summary, ignoring text that might be navigation related.\nRespond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n'},
 {'role': 'user',
  'content': "\nHere are the contents of a website.\nProvide a short summary of this website.\nIf it includes news or announcements, then summarize these too.\n\nWomen's Premier League 2026 | Live Cricket Score, Schedule, Latest News, Stats &amp; Videos | Cricbuzz.com\n\nMenu\nLive Scores\nSchedule\nArchives\nNews\nSeries\nTeams\nVideos\nRankings\nMore\nMATCHES\nPC\nvs\nPR\n-\nPR won\nMIW\nvs\nUPW\n-\nUPW won\nGGTW\nvs\nRCBW\n-\nPreview\nUSAU19\nvs\nINDU19\n-\nINDU19 won\nAUSU19\nvs\nIREU19\n-\nPreview\nALL\nAll\nLive Now\nToday\nLEAGUE\nSA20\nPretoria Capitals vs Paarl Royals\n25th Match\nMI Cape Town vs Sunrisers Eastern Ca

In [12]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = fetch_website_contents(url)
    response = openai.chat.completions.create(
        model = "gpt-4.1-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [13]:
summarize("https://cricbuzz.com")

'# Women\'s Premier League 2026 on Cricbuzz: Your Cricket HQ\n\n## What\'s This Website About?\nThis site is your ultimate cricket companion with a special spotlight on the **Women\'s Premier League (WPL) 2026**. But don‚Äôt let the WPL name fool you ‚Äî it\'s a whole cricket universe here! From live scores, schedules, and team stats to highlights and expert previews, Cricbuzz covers everything to keep cricket fans glued to their screens.\n\n## Key Highlights\n\n### Live Scores Galore\n- Up-to-the-minute scores for multiple leagues and tournaments worldwide.\n- Women\'s Premier League 2026 matches like Mumbai Indians Women vs UP Warriorz Women.\n- Other leagues such as SA20, BBL 2025-26, BPL 2025-26, Super Smash 2025-26, plus domestic fixtures.\n- Under 19 World Cup 2026 news, keeping an eye on future superstars.\n\n### Recent Results & Fierce Contests\n- Paarl Royals clinched victory by 6 wickets over Pretoria Capitals.\n- UP Warriorz Women triumphed against Mumbai Indians Women with 

In [14]:
# A function to display this nicely in the output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [15]:
display_summary("https://cricbuzz.com")

# Women's Premier League 2026 & More: Cricbuzz's Cricket Carnival

## Overview  
This website is your ultimate cricket playground, with a special spotlight on the Women's Premier League (WPL) 2026. It offers **live scores, match schedules, latest news, stats, and videos** across various formats and leagues. Think of it as the cricketing Swiss Army knife‚Äîequipped for any fan's needs!

## What‚Äôs Hot?  
- **Women's Premier League 2026**: Follow thrilling matches like Mumbai Indians Women vs UP Warriorz Women and Gujarat Giants Women vs Royal Challengers Bengaluru Women, complete with live scores and forecasts.
- **Global Leagues & Tournaments**: From SA20 in South Africa to Big Bash League (BBL) in Australia and domestic tournaments like Vijay Hazare Trophy, it‚Äôs a full buffet of cricket.
- **Under-19 World Cups**: Stay updated with exciting youth battles such as USA U19 vs India U19 ‚Äì cricket‚Äôs future stars in action.
- **Live & Recent Results**: Already wrapped up? Paarl Royals and UP Warriorz Women are celebrating recent wins, keeping the thrill alive.

## News & Announcements  
While the site mainly focuses on **live updates**, you get previews and real-time results rather than traditional news articles. Expect *instant match results* and schedule updates ‚Äî perfect for fans who hate missing a ball.

## Summary with a Slice of Humor  
If cricket were a soap opera, this site serves your daily dose of drama, suspense, and joy‚Äîwith less yelling at the screen and more up-to-the-minute scores. The Women‚Äôs Premier League 2026 is stealing the spotlight, proving women‚Äôs cricket isn‚Äôt just good; it‚Äôs great fun to watch!

So, whether you want to see the latest six smashed or track the under-19 prodigies before they‚Äôre rich and famous, this site is your cricket command center. Just remember: when you‚Äôre glued to this page, "five more minutes" could easily turn into a full match day! üèèüòÑ

In [16]:
display_summary("https://nationalgeographic.com")

# National Geographic Website Summary

## Exploring Wonders and Curiosities of Our World

National Geographic delivers a treasure trove of engaging articles that span health, environment, animals, science, and travel‚Äîbasically everything you didn‚Äôt know you were curious about. From the hidden kingdom of seahorses to the grumpy-faced Texas horned lizard, the site offers a brilliant mix of nature‚Äôs marvels and quirky creatures that might just brighten your day (and your Instagram feed).

---

## Highlights & Hot Takes

### Health Myths & Innovations
- **Steroid use is more widespread than most people believe**: Spoiler‚Äîit's not just for the gym bros.
- **Virtual reality to treat chronic pain**: Sci-fi tech meets real-world healing.
  
### Travel Spots to Dream About
- **6 sun-soaked island archipelagos for a winter getaway**: Because who wants winter indoors?

### Environment & Science Eye-Openers
- **The uncertain future of the world's most expensive spice**: Hint‚Äîthis isn‚Äôt your grandma‚Äôs seasoning drama.
- **Woolly rhino genome found in a frozen wolf's stomach**: Nature‚Äôs freezer surprises keep on coming.
- **Marine scientist‚Äôs seafood confession including octopus**: Eating what you study? Brave or just hungry?

---

## From the Vault: Retro Cool

National Geographic gives its iconic features a digital facelift, reviving legendary stories like:
- The archaeology breakthrough with sunken ceramics.
- A 100-year-old sun compass that helped polar explorers find their way.
- The harrowing Everest ascent by the first Americans.

---

## Special Feature: Celebrity Meets Science

- **Limitless with Chris Hemsworth**: Thor tries his hand at epic science challenges. Because when you're a god, curiosity is another superpower.

---

## Summary

If curiosity had a homepage, it‚Äôd be this site‚Äîmixing serious science with cultural tales, environment updates, travel inspiration, and a dash of celebrity charm. Perfect for armchair adventurers and fact-hungry explorers alike.

In [17]:
display_summary("https://abpmajha.com")

# Summary of ABP Majha Marathi News Website

## Overview
ABP Majha is a vibrant Marathi news portal delivering the latest breaking news across Maharashtra and beyond. The website is a treasure trove of news categories including politics, entertainment, sports, business, technology, crime, lifestyle, and more. It offers content in Marathi, Hindi, and English, reflecting its diverse audience.

## News Highlights & Announcements
- **Maharashtra Local Body Polls**: Crunch-time drama as 29 municipal corporation election results roll out today. The political temperature is sizzling ‚Äî will the ruling Shiv Sena faction retain power or will the BJP snatch the throne? Expect power struggles, surprise results, and a few eyebrow-raising political shenanigans.
- **Mumbai BMC Election 2026**: The storyline unfolding is like a Bollywood thriller with factions led by Thackeray brothers, Shinde, and Fadnavis locked in a fierce battle to dominate the Brihanmumbai Municipal Corporation.
- **Political Clashes**: Actual street fights have broken out, such as in Thane where supporters of Minakshi Shinde and Bhoir squared off, involving police interventions and even chili powder assaults! This isn‚Äôt your typical political mudslinging‚Äîit‚Äôs tactical chili warfare.
- **Daily Life & Public Outcry**: From trash piling outside international schools to heated political controversies in Pimpri-Chinchwad, the site keeps a finger on the pulse of civic issues that matter to ordinary Maharashtrians.
- **Weekly Horoscope & Fortune Updates**: Because politics and daily news may shake the ground, but the stars have their own plans for Me·π£a, V·πõ·π£abha, Mithuna, Karka, and beyond.
- **Interactive Tools**: Handy calculators for home loans, personal loans, BMI, and even age‚Äîbecause who doesn‚Äôt want to know their financial and fitness status while catching up on the latest headlines?

## Humorous Take
If Maharashtra‚Äôs politics were a soap opera, ABP Majha would be your ultimate binge partner. With electoral drama, fiery protests, and chili powder duels, watching this site is like having a bowl of spicy Puran Poli‚Äîsweet, intense, and unforgettable!

---

**In a nutshell:** Need your daily dose of Marathi politics, local drama, cultural zest, and practical life hacks? ABP Majha delivers it all, sprinkling a mix of sunshine, storm, and starry predictions on a platter!