# End of week 1 exercise

To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  
and responds with an explanation. This is a tool that you will be able to use yourself during the course!

In [8]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
from urllib.parse import urljoin, urlparse


In [2]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

In [None]:
# set up environment
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# openai = OpenAI()
# model = MODEL_GPT

openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
model = MODEL_LLAMA



In [54]:
def generate_messages(system_prompt, user_prompt):
    return [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]

class Website:
    def __init__(self, url) -> None:
        self.url = url

        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)

        self.soup = BeautifulSoup(response.content, 'html.parser')

    def get_contents(self) -> str:
        content_list = []
        for text in self.soup.find_all(string=True): # Finds all strings in the soup:
            # Check if the text is part of a visible tag and not just whitespace or script/style
            if text.parent.name not in ['script', 'style', 'head', 'meta', '[document]'] and text.strip():
                content_list.append(text.strip())

        # Join the content with newlines
        return "\n".join(content_list)

    def get_links(self) -> List:
        links = []

        for a_tag in self.soup.find_all('a', href=True):
            href = a_tag['href']
            full_url = urljoin(self.url, href) # Resolve relative URLs to absolute URLs
            links.append(full_url)
        return links
    
    def get_relevant_links(self) -> List:
        system_prompt = """
        You are provided list of links of the website. 
        You are able to decide which links are suitable for being used for company brochure. For example, links for About page, Careers page, Contact page, etc.
        You should return JSON format, following these two examples:
        {
            "links": [
                {
                    "page_type": "about page",
                    "link": "http://example.com/about
                },
                {
                    "page_type": "carrer page",
                    "link": "http://carrers.example.com"
                }
            ]
        }

        or 

        {
            "links": [
                {
                    "page_type": "history page",
                    "link": "http://example.com/our-story"
                },
                {
                    "page_type": "contact page",
                    "link": "http://example.com/contact-us"
                }
            ]
        }
        """

        user_prompt = f"""
        Here is list of links for website {self.url}.
        Please decide which links are suitable for company brochure. 
        Please limit to maximum of 5 links and do not inclde Terms anf Services, Privacy links or email.
        Response as full url format.
        Links: 
        """
        user_prompt += "\n".join(self.get_links())

        messages = generate_messages(system_prompt, user_prompt)

        res = openai.chat.completions.create(
            model=model,
            messages=messages,
            response_format={"type": "json_object"},
        )
        result = res.choices[0].message.content
        return json.loads(result).get("links")
    
    def build_brochure(self, name: str) -> str:
        system_prompt = """
        You are provided a company name, its website url and some relevant links from that website as well as contents from these links.
        Please build a brochure of this company for prosepective investors, customers. The brochure should be based on provided information, describing company features, cultures if they are provided.
        You should response in Markdown format. 
        """

        user_prompt = f"""
        Here is the information for you to create brochure.
        Company name: {name}, website: {self.url}
        Please generate brochure in markdown format by using the below contents from the website and relevant links from the website:

        """

        user_prompt += "Homepage: \n"
        user_prompt += self.get_contents()
        user_prompt += "\n\n"

        relevant_links = self.get_relevant_links()
        for link in relevant_links:
            link_obj = Website(link["link"])

            user_prompt += f"{link['page_type']}\n"
            user_prompt += f"{link_obj.get_contents()}\n\n"
        
        res = openai.chat.completions.create(
            model=model,
            messages= generate_messages(system_prompt, user_prompt[:5000])
        )
        result = res.choices[0].message.content
        return result

    def translate_to_spanish(self, content: str) -> str:
        system_prompt = """
        You are the interpreter for the company. Your job is to translate the brochure from English to Spanish.
        Please be noted that you only translate the content only, remember to response the result with the same format of brochure I give you. 
        """
        user_prompt = f"""
        Here is the brochure of company in English, please translate it to Spanish.
        
        {content}
        """

        res = openai.chat.completions.create(
            model=model,
            messages = generate_messages(system_prompt, user_prompt)
        )
        return res.choices[0].message.content

In [55]:
ed = Website("https://edwarddonner.com")
brochure_text = ed.build_brochure(name="Edward Donner")

In [60]:
# display(Markdown(ed.translate_to_spanish(brochure_text)))
display(Markdown(brochure_text))

# brochure_text

**Edward Donner Brochure**
=====================================

**Welcome to Edward Donner**
------------------------

We're excited to introduce you to Edward Donner, a pioneering company on the forefront of AI innovation and talent management. Our mission is to empower individuals to discover their potential and pursue their passion, driven by our proprietary matching model that revolutionizes the recruitment process.

**Meet Our Founder and CTO, Ed**
------------------------------

Ed, our founder and CTO, brings over two decades of experience in software engineering, data science, technology leadership, and entrepreneurship. As a passionate advocate for applying AI to real-world problems, Ed co-founded untapt, an AI startup acquired by GQR's parent company in 2021. This marked the birth of Nebula.io, our current venture focused on talent management using generative AI.

**Our Vision**
-------------

At Edward Donner, we're driven by a concept called Ikigai, which represents the sweet spot where one's passion meets their skill and contribution to society. Our long-term goal is to help people find roles that align with their passions, leading to increased human prosperity. With 77% of people feeling uninspired or unengaged at work, we believe our mission can make a significant positive impact.

**Our Technology**
-----------------

Our patented matching model uses generative AI to match candidates with job roles requiring no keywords required, resulting in faster and more accurate matches. We've seen remarkable success stories through our platform, which is also completely free to try for recruiters and job seekers alike.

**Awards and Recognition**
-------------------------

Edward Donner has received recognition from esteemed sources, including:

* American Banker Top 20 Company To Watch
* Fast Company, Forbes, and American Banker
* Interviewed on the floor of the New York Stock Exchange and Nasdaq

**Stay Connected**
-----------------

Want to learn more about our mission and technology? Follow Ed on:

* LinkedIn: [Ed's LinkedIn Profile](https://www.linkedin.com/in/[Ed's LinkedIn URL])
* Twitter: [@EdwardDonner](https://twitter.com/EdwardDonner)
* Facebook: [Edward Donner page](https://facebook.com/EdwardDonner)

**Subscribe to Our Newsletter**
---------------------------

Stay up-to-date on the latest news, updates, and insights from Edward Donner. Subscribe now by entering your email address:

[Subscribe Form]

**Contact Us**
-------------

Ready to connect with Ed or learn more about our mission? Reach out to us at:

ed[at] edwarddonner [dot] com
https://www.edwarddonner.com

We look forward to connecting with you!

In [57]:
# here is the question; type over this to ask something new

question = """
Please explain what this code does and why:
yield from {book.get("author") for book in books if book.get("author")}
"""

In [58]:
# Get gpt-4o-mini to answer, with streaming

In [59]:
# Get Llama 3.2 to answer