In [33]:
import os
import requests
from groq import Groq
import json
from dotenv import load_dotenv
from typing import List
from IPython.display import Markdown, display, update_display
from bs4 import BeautifulSoup

In [34]:
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
    print("No API Keys found")
elif api_key.strip() !=api_key:
    print("API Key might have tabs or spaces in it")
else:
    print("API Key found and looks good")

API Key found and looks good


In [54]:
MODEL = "llama-3.3-70b-versatile"

In [35]:
groq = Groq()

In [36]:
class Website:

    url:str
    body:str
    title:str
    links:str
    text:str

    def __init__(self,url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script","style","img","input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n",strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents\n{self.text}\n\n"
    
    


In [37]:
temp = Website("https://www.cognine.com")
display(temp.links)

['#primary',
 'http://cognine.com/',
 'http://cognine.com/what-we-do/',
 'http://cognine.com/ai-development/',
 'http://cognine.com/ai-development/ml-deep-learning/',
 'http://cognine.com/ai-development/gen-ai/',
 'http://cognine.com/ai-development/automation/',
 'http://cognine.com/data-analytics/',
 'http://cognine.com/data-analytics/data-management-engineering/',
 'http://cognine.com/data-analytics/predictive-analytics/',
 'http://cognine.com/data-analytics/data-visualization/',
 'http://cognine.com/data-analytics/data-quality/',
 'http://cognine.com/digital-engineering/',
 'http://cognine.com/digital-engineering/app-development/',
 'http://cognine.com/digital-engineering/product-engineering/',
 'http://cognine.com/digital-engineering/integrations/',
 'http://cognine.com/digital-engineering/quality-testing/',
 'http://cognine.com/cloud-development/',
 'http://cognine.com/cloud-development/cloud-architecture-advisory/',
 'http://cognine.com/cloud-development/azure/',
 'http://cognine

In [62]:
link_system_prompt = "You are provided with a list of links found on a website. \
You are able to decide which of the links would be most relevant to include in the brochure about the company, \
such as links to About page or company page or Careers/Jobs page.\n"
link_system_prompt+= "You should respond in JSON as in the examples below."
link_system_prompt+= """
{
"links":[
    {"type":"about page","url":"https://full.url/goes/here/about/"},
    {"type":"careers page","url":"https://another.full.url/careers/"}
    ]
}
\n\n
"""
link_system_prompt += """

"links":[
    {"type":"contact", "url":"https://url.com/yadayada/contact},
    {"type":"customers","url":"https://website.net/us/customers}
]
"""

In [39]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url}  -  "
    user_prompt += "Please provide which of these are relevant web links for a brochure about the company, respond with the full HTTPS URL :"
    user_prompt += "Do not include Terms of Service, Privacy, Email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [40]:
print(get_links_user_prompt(temp))

Here is the list of links on the website of https://www.cognine.com  -  Please provide which of these are relevant web links for a brochure about the company, respond with the full HTTPS URL :Do not include Terms of Service, Privacy, Email links.
Links (some might be relative links):
#primary
http://cognine.com/
http://cognine.com/what-we-do/
http://cognine.com/ai-development/
http://cognine.com/ai-development/ml-deep-learning/
http://cognine.com/ai-development/gen-ai/
http://cognine.com/ai-development/automation/
http://cognine.com/data-analytics/
http://cognine.com/data-analytics/data-management-engineering/
http://cognine.com/data-analytics/predictive-analytics/
http://cognine.com/data-analytics/data-visualization/
http://cognine.com/data-analytics/data-quality/
http://cognine.com/digital-engineering/
http://cognine.com/digital-engineering/app-development/
http://cognine.com/digital-engineering/product-engineering/
http://cognine.com/digital-engineering/integrations/
http://cognine.

In [55]:
def get_links(url):
    website = Website(url)
    completion = groq.chat.completions.create(
        model = MODEL,
        messages = [
            {"role":"system","content":link_system_prompt},
            {"role":"user","content":get_links_user_prompt(website)}
        ],
        response_format = {"type":"json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)

In [42]:
get_links("https://www.anthropic.com")

{'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'}]}

In [58]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    #print("Found Links!",links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [None]:
print(get_all_details("https://www.anthropic.com"))

In [52]:
system_prompt = "You are an assistant that analyses the contents of several relevant pages from a company website and creates a short brochure \
about the company for prospective customers, investors and recruits. Respond in Markdown. Include details of the company culture, customers and \
careers/jobs if you have the information"

In [61]:
def get_brochure_user_prompt(company_name,url):
    user_prompt = f"You are looking at a company called {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages;use this to build a short brochure of the company in markdown. At the end add links for various social media handles like linkedin, x, instagram etc.\
    For multi level detail use bullet points for better user readibility."
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000]
    return user_prompt

In [56]:
def create_brochure(company_name,url):
    response = groq.chat.completions.create(
        model = MODEL,
        messages = [
            {"role":"system","content":system_prompt},
            {"role":"user","content":get_brochure_user_prompt(company_name,url)},
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [63]:
create_brochure("Anthropic","https://www.anthropic.com")

# Introduction to Anthropic
Anthropic is an AI safety and research company based in San Francisco. Our mission is to build reliable, interpretable, and steerable AI systems that put safety at the forefront. We believe AI will have a vast impact on the world and are dedicated to ensuring that this technology is developed and used responsibly.

## Company Culture
At Anthropic, we value:
* **Here for the mission**: We exist to ensure transformative AI helps people and society flourish.
* **Unusually high trust**: We assume good faith, disagree kindly, and prioritize honesty.
* **One big team**: Collaboration is central to our work, culture, and value proposition.
* **Do the simple thing that works**: We celebrate trying the simple thing before the clever, novel thing.
* **Act for the global good**: We strive to make decisions that maximize positive outcomes for humanity in the long run.

## Our Work
We conduct frontier AI research across a variety of modalities, including:
* **Research**: We explore novel and emerging safety research areas from interpretability to RL from human feedback to policy and societal impacts analysis.
* **Product**: We translate our research into tangible, practical tools like Claude that benefit businesses, nonprofits, and civil society groups.
* **Policy**: We think about the impacts of our work and strive to communicate what weâ€™re seeing at the frontier to policymakers and civil society.

## Careers at Anthropic
We're a team of researchers, engineers, policy experts, and operational leaders, with experience spanning a variety of disciplines. We offer:
* **Competitive salary and equity packages**
* **Comprehensive health, dental, and vision insurance**
* **22 weeks of paid parental leave**
* **Flexible paid time off and absence policies**
* **Generous mental health support**

## Join Our Team
If you're passionate about building safe and reliable AI systems, we encourage you to explore our open roles. We value direct evidence of ability, and our interview process is designed to identify thoughtful candidates who bring unique strengths to our multidisciplinary team.

## Get in Touch
Follow us on social media:
* [LinkedIn](https://www.linkedin.com/company/anthropic/)
* [Twitter](https://twitter.com/anthropic)
* [Instagram](https://www.instagram.com/anthropic/)
* [YouTube](https://www.youtube.com/anthropic)
* [Contact Us](https://www.anthropic.com/contact)