# Summarize website using ollama (Local lightweight LLM)


In [9]:
# imports
import ollama
import gradio as gr
from bs4 import BeautifulSoup
import requests
from IPython.display import Markdown, display

In [10]:
# constants
MODEL = "llama3.2"

In [11]:

# A class to represent a Webpage
class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [29]:
# Step 1: Create your prompts

# Define our system prompt
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

# Step 2: Make the messages list

# A function that writes a User Prompt that asks for summaries of websites:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

# See how this function creates exactly the format above
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

# Step 3: Call ollama function

def summarize(url):
    website = Website(url)
    messages = messages_for(website)
    response = ollama.chat(model=MODEL, messages=messages)
    return response['message']['content']


# Step 4: print the result
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

def gr_summary(url):
    return summarize(url)


In [31]:
display_summary("https://anthropic.com")

### Anthropic Website Summary

#### Company Overview

Anthropic is an AI safety and research company based in San Francisco, with a multidisciplinary team experienced in ML, physics, policy, and product. The company generates research and creates reliable, beneficial AI systems.

#### News and Announcements

* **Oct 22, 2024**: Introducing computer use (new Claude 3.5 Sonnet and Claude 3.5 Haiku) - Model updates
* **Sep 4, 2024**: Claude for Enterprise - Announcement of product for business customers
* **Dec 15, 2022**: Constitutional AI: Harmlessness from AI Feedback - Research paper announcement
* **Mar 8, 2023**: Core Views on AI Safety: When, Why, What, and How - Announcement of research

#### Product Overview

Anthropic offers a range of products, including:

* **Claude**: An intelligent AI model available for use through the API.
* **Claude for Enterprise**: A product designed for business customers.
* **API**: Build with Claude to drive efficiency and create new revenue streams.

#### Research Areas

Anthropic focuses on AI safety research, including alignment, constitutional AI, and other topics.

In [32]:
# Use gradio UI

In [30]:
gr.Interface(fn=gr_summary, inputs="textbox", outputs="textarea").launch()

* Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.


