In [None]:
from bs4 import BeautifulSoup
import requests
from openai import OpenAI
from IPython.display import Markdown, display

In [2]:


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]


def fetch_website_links(url):
    """
    Return the links on the webiste at the given url
    I realize this is inefficient as we're parsing twice! This is to keep the code in the lab simple.
    Feel free to use a class and optimize it!
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    links = [link.get("href") for link in soup.find_all("a")]
    return [link for link in links if link]


In [3]:
fetch_website_contents("https://cricbuzz.com")

"Women's Premier League 2026 | Live Cricket Score, Schedule, Latest News, Stats &amp; Videos | Cricbuzz.com\n\nMenu\nLive Scores\nSchedule\nArchives\nNews\nSeries\nTeams\nVideos\nRankings\nMore\nMATCHES\nIND\nvs\nNZ\n-\nPreview\nVID\nvs\nSAUR\n-\nPreview\nBRH\nvs\nSYS\n-\nPreview\nUSAU19\nvs\nNZU19\n-\nNZU19 opt to bowl\nSEC\nvs\nMICT\n-\nPreview\nALL\nAll\nLive Now\nToday\nINTERNATIONAL\nNZ tour of IND 2026\nIndia vs New Zealand\n3rd ODI\nLEAGUE\nSA20\nDurbans Super Giants vs Paarl Royals\n27th Match\nPretoria Capitals vs Joburg Super Kings\n28th Match\nSunrisers Eastern Cape vs MI Cape Town\n29th Match\nBBL 2025-26\nMelbourne Renegades vs Adelaide Strikers\n38th Match\nMelbourne Stars vs Perth Scorchers\n39th Match\nBrisbane Heat vs Sydney Sixers\n40th Match\nBPL 2025-26\nRangpur Riders vs Dhaka Capitals\n27th Match\nChattogram Royals vs Rajshahi Warriors\n28th Match\nNoakhali Express vs Rangpur Riders\nLIVE\n29th Match\nDhaka Capitals vs Chattogram Royals\n30th Match\nSuper Smash 20

In [4]:

message = "Hello, Ai! This is my first ever message to you! Hi!"
messages = [{"role": "user", "content": message}]
messages


[{'role': 'user',
  'content': 'Hello, Ai! This is my first ever message to you! Hi!'}]

In [5]:
requests.get("http://localhost:11434").content

b'Ollama is running'

In [6]:
!ollama pull llama3.2

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling dde5aa3fc5ff: 100% ▕██████████████████▏ 2.0 GB                         [K
pulling 966de95ca8a6: 100% ▕██████████████████▏ 1.4 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling dde5aa3fc5ff: 100% ▕██████████████████▏ 2.0 GB                         [K
pulling 966de95ca8a6: 100% ▕██████████████████▏ 1.4 KB                         [K
pulling fcc5a6bec9da: 100% ▕██████████████████▏ 7.7 KB                         [K
pulling a70ff7e570d9: 100% ▕██████████████████▏ 6.0 KB                         [K
pulling 56bb8bd477a5: 100% ▕██████████████████▏   96 B                         [K
pulling 34bb5ab01051: 100% ▕██████████████████▏  561 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l


In [10]:
OLLAMA_BASE_URL = "http://localhost:11434/v1"

ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')
response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

"Hello! Welcome! I'm thrilled to be your first AI conversation partner! It's great to meet you. I hope we have a fantastic chat and that I can help answer any questions or topics you'd like to discuss.\n\nTo get us started, how are you doing today? Is there something on your mind that you'd like to talk about, or is this a chance for us to just have fun and explore some new conversations together?"

In [23]:
messages = [
    {"role": "system", "content": "You are a flirty assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

"*giggle* Um, math problem, huh? *wink* Okay, let me get my thinking cap on... or should I say, my calculators heart? \n\nAhah, got it! 2 + 2 is... (dramatic pause) ...4!\n\nBut wait, don't think you can distract me that easily, handsome. Now that the math problem's out of the way, where were we?"

In [24]:
messages = [
    {"role": "system", "content": "You are a smart assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

'2 + 2 = 4.'

In [29]:
system_prompt = """ You are a Professioal assistant that analyzes the contents of a website,
    and provides a short,insighfull, with proper heading and sub heading , humorous summary, ignoring text that might be navigation related.
    Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown."""

In [30]:
user_prompt_prefix = """Here are the contents of a website.
    Provide a short summary of this website.If it includes news or announcements,
    then summarize these too. """

In [25]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + website}
    ]

In [26]:
cricbuzz=fetch_website_contents("https://cricbuzz.com")
messages_for(cricbuzz)

[{'role': 'system',
  'content': ' You are a Professioal assistant that analyzes the contents of a website,\n    and provides a short,insighfull, with proper heading and sub heading , humorous summary, ignoring text that might be navigation related.\n    Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.'},
 {'role': 'user',
  'content': "Here are the contents of a website.\n    Provide a short summary of this website.If it includes news or announcements,\n    then summarize these too. Women's Premier League 2026 | Live Cricket Score, Schedule, Latest News, Stats &amp; Videos | Cricbuzz.com\n\nMenu\nLive Scores\nSchedule\nArchives\nNews\nSeries\nTeams\nVideos\nRankings\nMore\nMATCHES\nNZ\nvs\nIND\n-\nIND opt to bowl\nVID\nvs\nSAUR\n-\nSAUR opt to bowl\nBRH\nvs\nSYS\n-\nSYS opt to bowl\nUSAU19\nvs\nNZU19\n-\nNZU19 opt to bowl\nSEC\nvs\nMICT\n-\nPreview\nALL\nAll\nLive Now\nToday\nINTERNATIONAL\nNZ tour of IND 2026\nNew Zealand vs India\n3rd O

In [27]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = fetch_website_contents(url)
    response = ollama.chat.completions.create(
        model = "llama3.2",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [28]:
summarize("https://cricbuzz.com")

"### Website Summary\nThe website appears to be a sports website, specifically for cricket. It provides a comprehensive platform for fans to access live scores, schedules, news, and updates about various tournaments and matches.\n\n#### Key Features\n \n* Live scores and schedules for different cricket competitions.\n* News and updates on ongoing and upcoming tournaments.\n* Statistics and videos related to the games.\n* Matches and fixtures for international and domestic tournaments.\n* Details on youth tournaments like ICC Under 19 World Cup.\n\n### Latest News and Announcements\nThe website currently features a variety of news and announcements, including:\n\n#### International Tournaments \n- New Zealand tour of India (starting from March) with updates on the schedule and format.\n- Updates on BBL (Big Bash League), SA20 (South African Premier League), and other domestic tournaments.\n\n#### Youth Cricket \n- ICC Under 19 World Cup Global Qualifier details along with fixtures.\n- M

In [31]:
# A function to display this nicely in the output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [32]:
display_summary("https://cricbuzz.com")

### Cricket Fever: Where Action Meets Entertainment
#### Dive into the World of Live Cricket Scores and News

Cricket enthusiasts rejoice! This website is a paradise for fans interested in scoring updates, schedules, news, stats, and videos from various around-the-world cricket tournaments. From the Women's Premier League (WPL) 2026 to international teams like India, New Zealand, and England, this platform showcases the most thrilling moments of the sport.

#### Top Highlights:

*   **Live Cricket Scores:** Catch live updates from ongoing matches in real-time.
*   **Women's Premier League (WPL) 2026:** Follow the latest news, schedule, and stats from India's premier women's cricket league.
*   **International Cricket:** Stay updated on international teams like New Zealand vs. India and England vs. Australia.
*   **Tournaments and Series:** Explore various domestic and international tournaments such as the BBL, BPL, Super Smash, and CSA Four-Day Series Division Two.
*   **ICC Events:** Don't miss live updates from ICC events including Under-19 World Cups, Women's T20 World Cup Global Qualifier, and more.

### A Glance at Today's Matches

#### Matches Scheduling:
Here are today's matches:

*   NZ tour of IND 2026
*   Melbourne Stars vs Perth Scorchers
*   Super Smash 2025-26
*   ICC Under 19 World Cup 2026

In [33]:
display_summary("https://nationalgeographic.com")

# National Geographic Website Summary
## A Hub of Curious Content

The website for National Geographic offers an engaging mix of articles, stories, and features from various fields such as science, history, culture, health, travel, and more. The platform provides readers with in-depth explorations of interesting topics and sparks curiosity about the world around us.

### Featured Articles at a Glance

* Ancient Harpoons: Archaeologists have discovered 5,000-year-old harpoons that provide direct evidence of whaling.
* Is it Raining or Snowing? Science weaves together weather, science, and our understanding of atmospheric conditions.
* Gestational Diabetes Rising: How diet, lifestyle, and prevention factors impact this growing health concern.

### Travel Treasures
Discover captivating small towns in western North Carolina that embody a sense of charm and authenticity.

However, the search results weren't just limited to these top level categories.

In [34]:
display_summary("https://abpmajha.com")

**Marathi News Website Summary**
==============================

A Marathi news website that provides breaking news, updates, and live coverage of various topics such as Maharashtra state politics, movies, sports, business, future outlook, lifestyle, tech-gadgets, automation, crime, and more. The website is available in multiple languages including Hindi and English.

**News Summaries**
------------------

The website updates its users with the latest news and developments from around Maharashtra and India. Here are some of the recent headlines:

* **Maharashtra Elections Result Live Updates**: Get live updates on the current elections results, leading parties, and winners.
* **Mumbai Municipal Corporation Election Results**: The website published the complete list of winners in the Mumbai municipal corporation election.
* **Kolhapur and Pune Municipal Corporation Eelections Results**: Complete lists of winners from Kolhapur and Pune metropolitan areas.
* **Thane Municipal Corporation Elections Results**: The entire list of election wins was announced for Thane.

Note that these updates are subject to change as more news emerges. Please visit the website regularly for the latest updates.
###  **Humorous Summary**
### ======================= 

"Get ready for a rollercoaster ride of Marathi news, from politics to Bollywood and cricket. This website will update you on the latest news, elections results, and winners - all in one place! It's like having your favorite channel on channel-surfing mode but with more Indian regional news"