In [22]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# If you get an error running this cell, then please head over to the troubleshooting notebook!

In [23]:
# Load environment variables in a file called .env
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()


API key looks good so far


In [24]:
# call to a frontier model i.e. OpenAI 

message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

Hello! It’s great to hear from you! Welcome! How can I assist you today?


In [25]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        #BeautifulSoup is a package which is used to parsing wabpages
        soup = BeautifulSoup(self.body, 'html.parser')
        #Fetch the title from the webpage, remove the images or style contents and store the text contents 
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [26]:
ed = Website("https://framonde.com")
#print(ed.title)
#print(ed.text)
ed.links

['#approach-us',
 '#our-courses',
 '#our-team',
 '#find-us',
 'https://wa.me/919910138144',
 'tel:+919910138144',
 '#approach-us',
 '#our-courses',
 '#our-team',
 '#find-us',
 'https://in.linkedin.com/in/framonde-india-a5413028a',
 'https://www.facebook.com/share/2TZCjfcbGvowV8XD/',
 'https://www.instagram.com/invites/contact/?igsh=nrun8qrffhhg&utm_content=sc3y156',
 '#banner-content']

In [27]:
## Types of prompts

Models like GPT4o have been trained to receive instructions in a particular way.

They expect to receive:

**A system prompt** that tells them what task they are performing and what tone they should use

**A user prompt** -- the conversation starter that they should reply to

SyntaxError: invalid syntax (1703708753.py, line 3)

In [None]:
# Define our system prompt

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [None]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [None]:
print(user_prompt_for(ed))

In [28]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [29]:
response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)

Oh, only the most challenging math problem ever! Drumroll, please... It's 4. Shocking, I know!


In [30]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [31]:
messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled Framonde - The French Classes\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nOur Approach\nCourses\nTeam\nContact\r\n                            us\n+91\r\n                        9910138144\nLearn French with\nFraMonde\nFraMonde (means French World) is a French Language\r\n                        Institute,\r\n                        established in 2023 at Noida with intention of\r\n                        opening\r\n                        doors\r\n                        to a new world of opportunities, from connecting\r\n                        with\r\n                        French\r\n             

In [32]:
# call the OpenAI API.

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [33]:
summarize("https://framonde.com")

"# Framonde - The French Classes Overview\n\nFraMonde, established in 2023 in Noida, is a dedicated French Language Institute aimed at providing quality French education to learners around the globe. The institution's mission is to open up opportunities for individuals to connect with French speakers and immerse themselves in the rich culture of France and other French-speaking countries.\n\n## Key Features\n- **Online Classes**: FraMonde offers both 1-to-1 and group classes exclusively online, allowing for flexible learning regardless of location. \n- **Diverse Course Offerings**:\n  - **Tuition for School Students**: Tailored classes focusing on the school curriculum.\n  - **DELF Exam Preparation**: Specialized course for students aiming to delve into French language and culture.\n  - **French for Working Professionals**: Courses designed for professionals looking to enhance their French skills for job relevance.\n\n## Teaching Approach\nFraMonde emphasizes personalized coaching for 

In [34]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [35]:
display_summary("https://framonde.com")

# Framonde - The French Classes

FraMonde is a French Language Institute established in 2023 in Noida, India. The institute aims to provide opportunities for individuals to connect with French speakers globally and explore the culture of France and other Francophone countries.

## Offerings
FraMonde offers a variety of French language courses designed for different proficiency levels, including:

- **Online 1 to 1 Classes & Group Classes**: Accessible anytime, anywhere, supporting students in maintaining continuity in their learning.
- **Tuition for School Students**: Classes tailored to school curricula, making learning enjoyable and interactive.
- **DELF Exam Preparation / University Test**: Specialized courses for students focusing on French language, literature, and culture.
- **French for Working Professionals**: Courses aimed at improving French skills relevant to professional settings.

## Approach
The coaching programs at FraMonde are personalized to meet the unique learning styles and objectives of each student, whether they are beginners or advanced learners.

## Team
The institute is led by Ms. Joshita Batra, who holds an M.A. in French from Jawaharlal Nehru University and has over a decade of teaching experience in both traditional and online formats. 

FraMonde is committed to quality education and passionate about helping students achieve fluency and confidence in French.

In [None]:
-------------------------------------------------------------------------------------------------

In [36]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [38]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [39]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [40]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://framonde.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
#approach-us
#our-courses
#our-team
#find-us
https://wa.me/919910138144
tel:+919910138144
#approach-us
#our-courses
#our-team
#find-us
https://in.linkedin.com/in/framonde-india-a5413028a
https://www.facebook.com/share/2TZCjfcbGvowV8XD/
https://www.instagram.com/invites/contact/?igsh=nrun8qrffhhg&utm_content=sc3y156
#banner-content


In [43]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [44]:
framonde = Website("https://framonde.com")
framonde.links

['#approach-us',
 '#our-courses',
 '#our-team',
 '#find-us',
 'https://wa.me/919910138144',
 'tel:+919910138144',
 '#approach-us',
 '#our-courses',
 '#our-team',
 '#find-us',
 'https://in.linkedin.com/in/framonde-india-a5413028a',
 'https://www.facebook.com/share/2TZCjfcbGvowV8XD/',
 'https://www.instagram.com/invites/contact/?igsh=nrun8qrffhhg&utm_content=sc3y156',
 '#banner-content']

In [46]:
get_links("https://framonde.com")

{'links': [{'type': 'our team page', 'url': 'https://framonde.com#our-team'},
  {'type': 'our courses page', 'url': 'https://framonde.com#our-courses'},
  {'type': 'approach us page', 'url': 'https://framonde.com#approach-us'},
  {'type': 'find us page', 'url': 'https://framonde.com#find-us'}]}