In [1]:
# Lets import all required objects
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [3]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
cdk = Website("https://www.cdkglobal.com/")
print(cdk.get_contents())
print ("Links:")
print (cdk.links)


Webpage Title:
CDK Global: Homepage
Webpage Contents:
CDK Global
CDK Global company logo
Who We Serve
Auto Dealerships
Original Equipment Manufacturers
Independent Software Vendors
Heavy Truck
Friction Points Study 2025
In our latest Friction Points study, we uncover where the industry has improved and what still creates friction for customers.
Explore More Details
Dealership Xperience Platform
Foundations Suite
Strengthen workflows in every department with one complete solution
Modern Retail Suite
Surpass consumer expectations by making the buying journey easier
Fixed Operations Suite
Build loyalty and efficiency to maximize your Parts and Service income
Vehicle Inventory Suite
Price more profitably and elevate merchandising with the power of AI
Intelligence Suite
Fine-tune performance with advanced analytics, AI and machine learning
What We Offer
Inventory
Digital Retail
Customer Relationship Management
Finance & Insurance
Dealer Management System
Fixed Operations
Network & Communica

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Include all the product offerings of the company. Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
print(get_links_user_prompt(cdk))

Here is the list of links on the website of https://www.cdkglobal.com/ - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Include all the product offerings of the company. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
/
/automotive
/oem-offerings
/independent-software-vendors
https://www.cdkglobalheavytruck.com
/insights/dealers-speed-sales-according-latest-cdk-friction-points-study
/dealership-xperience-platform
/cdk-foundations-suite
/cdk-modern-retail-suite
/cdk-fixed-operations-suite
/cdk-vehicle-inventory-suite
/cdk-intelligence-suite
/cdk-vehicle-inventory-suite
/automotive-digital-retailing
/sales-inventory/vehicle-sales/automotive-crm-software
/fi
/dms
/fixed-ops
/it-solutions
/fueling-dealerships-harnessing-data-and-intelligence
/cdk-unify
/about
https://careers.cdkglobal.com
/insights
/training
/media-center
/cdk-trust-center
/customer-stories
/s

In [9]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [10]:
get_links("https://www.cdkglobal.com/")

{'links': [{'type': 'about page', 'url': 'https://www.cdkglobal.com/about'},
  {'type': 'careers page', 'url': 'https://careers.cdkglobal.com'},
  {'type': 'automotive offering',
   'url': 'https://www.cdkglobal.com/automotive'},
  {'type': 'OEM offerings', 'url': 'https://www.cdkglobal.com/oem-offerings'},
  {'type': 'independent software vendors',
   'url': 'https://www.cdkglobal.com/independent-software-vendors'},
  {'type': 'CDK Foundations Suite',
   'url': 'https://www.cdkglobal.com/cdk-foundations-suite'},
  {'type': 'CDK Modern Retail Suite',
   'url': 'https://www.cdkglobal.com/cdk-modern-retail-suite'},
  {'type': 'CDK Fixed Operations Suite',
   'url': 'https://www.cdkglobal.com/cdk-fixed-operations-suite'},
  {'type': 'CDK Vehicle Inventory Suite',
   'url': 'https://www.cdkglobal.com/cdk-vehicle-inventory-suite'},
  {'type': 'CDK Intelligence Suite',
   'url': 'https://www.cdkglobal.com/cdk-intelligence-suite'},
  {'type': 'Dealership Xperience Platform',
   'url': 'https:

In [11]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [12]:
# print(get_all_details("https://www.cdkglobal.com/"))
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [13]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [14]:
get_brochure_user_prompt("CDK Global", "https://www.cdkglobal.com/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.cdkglobal.com/about'}, {'type': 'careers page', 'url': 'https://careers.cdkglobal.com'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/dealership-xperience-platform'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/cdk-foundations-suite'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/cdk-modern-retail-suite'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/cdk-fixed-operations-suite'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/cdk-vehicle-inventory-suite'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/cdk-intelligence-suite'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/automotive-digital-retailing'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/sales-inventory/vehicle-sales/automotive-crm-software'}, {'type': 'product offering', 'url': 'https://www.cdkglobal.com/fi'}, {'type': '

"You are looking at a company called: CDK Global\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nCDK Global: Homepage\nWebpage Contents:\nCDK Global\nCDK Global company logo\nWho We Serve\nAuto Dealerships\nOriginal Equipment Manufacturers\nIndependent Software Vendors\nHeavy Truck\nFriction Points Study 2025\nIn our latest Friction Points study, we uncover where the industry has improved and what still creates friction for customers.\nExplore More Details\nDealership Xperience Platform\nFoundations Suite\nStrengthen workflows in every department with one complete solution\nModern Retail Suite\nSurpass consumer expectations by making the buying journey easier\nFixed Operations Suite\nBuild loyalty and efficiency to maximize your Parts and Service income\nVehicle Inventory Suite\nPrice more profitably and elevate merchandising with the power of AI\nIntelligence

In [15]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [16]:
create_brochure("CDK Global", "https://www.cdkglobal.com/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.cdkglobal.com/about'}, {'type': 'careers page', 'url': 'https://careers.cdkglobal.com'}, {'type': 'automotive product offerings', 'url': 'https://www.cdkglobal.com/automotive'}, {'type': 'OEM offerings', 'url': 'https://www.cdkglobal.com/oem-offerings'}, {'type': 'Independent Software Vendors', 'url': 'https://www.cdkglobal.com/independent-software-vendors'}, {'type': 'CDK Foundations Suite', 'url': 'https://www.cdkglobal.com/cdk-foundations-suite'}, {'type': 'CDK Modern Retail Suite', 'url': 'https://www.cdkglobal.com/cdk-modern-retail-suite'}, {'type': 'CDK Fixed Operations Suite', 'url': 'https://www.cdkglobal.com/cdk-fixed-operations-suite'}, {'type': 'CDK Vehicle Inventory Suite', 'url': 'https://www.cdkglobal.com/cdk-vehicle-inventory-suite'}, {'type': 'CDK Intelligence Suite', 'url': 'https://www.cdkglobal.com/cdk-intelligence-suite'}, {'type': 'Dealership Xperience Platform', 'url': 'https://www.cdkglobal.com/de

```markdown
# CDK Global Brochure

## **Overview**

At **CDK Global**, we empower the automotive retail industry with innovative technology and solutions. With over 50 years of experience, we serve nearly **15,000 dealer locations**, providing a comprehensive suite of tools designed to enhance every aspect of dealership operations, from sales to service.

## **Who We Serve**

- **Auto Dealerships**
- **Original Equipment Manufacturers**
- **Independent Software Vendors**
- **Heavy Truck Industry**

## **Our Solutions**

Our powerful solutions are tailored to address the diverse needs of automotive retailers:

- **Dealership Xperience Platform**: Transform your dealership operations with our open and integrated solutions.
- **Digital Retail**: Seamlessly connect the online and in-store customer experience.
- **Customer Relationship Management (CRM)**: Centralize customer relationships to drive growth and loyalty.
- **Finance & Insurance (F&I)**: Maximize profits while enhancing customer satisfaction.
- **Dealer Management System (DMS)**: Streamline operations with the industry's leading system.
- **Fixed Operations**: Improve service operations and customer experiences to drive profitability.
- **Network & Communication Services**: Optimize your IT infrastructure.
- **Intelligence**: Utilize advanced analytics, AI, and machine learning to make informed business decisions.

## **Company Culture**

At CDK Global, we foster a culture of innovation and excellence. We believe in creating connections that move the automotive retail industry forward. Our dedicated team is focused on collaboration, continuous improvement, and achieving the best possible outcomes for our customers. 

- **Teamwork**: We encourage collaboration across departments to enhance creativity and efficiency.
- **Continuous Learning**: We prioritize training and personal development to equip our teams for success.
- **Customer-Centric**: Our commitment to serving our customers is central to our mission, driving everything we do.

## **Careers at CDK Global**

Join a team of passionate professionals who are at the forefront of automotive technology. We are always on the lookout for **the best and brightest** talent to help us innovate and grow. CDK Global offers:

- **Career Growth**: Opportunities for professional development and advancement.
- **Inclusivity**: A diverse workplace where all voices are heard and valued.
- **Impact**: Work on solutions that make a significant difference in the automotive community.

## **Get in Touch**

Interested in learning more about how CDK Global can elevate your dealership operations or looking to jumpstart your career with us? 

**Contact Us**:  
📞 833.620.2129  
🌐 [Visit Our Website](https://www.cdkglobal.com)

Together, let’s drive the future of automotive retail.
```


In [17]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [20]:
def stream_brochure_ui(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result
        

In [18]:
stream_brochure("CDK Global", "https://www.cdkglobal.com/")


Found links: {'links': [{'type': 'about page', 'url': 'https://www.cdkglobal.com/about'}, {'type': 'careers page', 'url': 'https://careers.cdkglobal.com'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/dealership-xperience-platform'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/cdk-foundations-suite'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/cdk-modern-retail-suite'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/cdk-fixed-operations-suite'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/cdk-vehicle-inventory-suite'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/cdk-intelligence-suite'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/automotive-digital-retailing'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/sales-inventory/vehicle-sales/automotive-crm-software'}, {'type': 'product offerings', 'url': 'https://www.cdkglobal.com/fi'}, {


# Welcome to CDK Global

## Who We Are
**CDK Global** is a leading provider of technology solutions for the automotive industry, empowering auto dealerships and manufacturers since 1973. With trusted software and services offered to nearly 15,000 dealer locations, we specialize in creating connections that enhance automotive retail performance. 

## What We Offer
Our solutions include a comprehensive range of products designed to optimize every aspect of dealership operations:
- **Dealership Xperience Platform**: Seamlessly integrate your sales and service processes.
- **Digital Retail**: Provide a unified experience across online and in-store channels.
- **Customer Relationship Management (CRM)**: Keep your customers at the forefront of your vision.
- **Finance & Insurance (F&I)**: Enhance profitability while ensuring customer contentment.
- **Dealer Management System (DMS)**: The industry’s foremost management system to streamline operations.
- **Fixed Operations**: Elevate service department efficiencies for better profitability.
- **Network & Communication Services**: Tailored IT solutions to empower your dealership.
- **Intelligence Suite**: Use advanced analytics and AI to drive informed decisions.

## Our Customers
We serve:
- **Auto Dealerships**: From small independent shops to large multi-location dealerships.
- **Original Equipment Manufacturers**: Supporting industry leaders with integrated solutions.
- **Independent Software Vendors**: Collaborating to enhance product offerings.
- **Heavy Truck Dealerships**: Specialized solutions for unique demands.

## Company Culture
At CDK Global, we're dedicated to fostering an innovative and inclusive work environment. We seek individuals who are driven, creative, and eager to collaborate. Our culture encourages growth and professional development, ensuring our team is equipped to navigate the evolving landscape of automotive technology.

### Join Us
We are always on the lookout for passionate talent to help us lead the industry. Explore exciting career opportunities with us and become a part of our mission to transform automotive retail.

## Contact Us
Have questions or want to learn more about our solutions? Connect with us at [Contact Us](tel:8336202129) or reach out through our website.

**CDK Global**: Driving innovation and performance in automotive retail.



In [21]:
# Lets setup a simple gradio ui using the stream_brochure function above that accepts the company name \
# and url and displays the streaming brochure data in markdown format
import gradio as gr
    
gr.Interface(
    fn=stream_brochure_ui,
    inputs=[
        gr.Textbox(label="Company Name", placeholder="Enter the company name"),
        gr.Textbox(label="Company URL", placeholder="Enter the company URL")
    ],
    outputs=[gr.Markdown(label="Brochure:")],
    flagging_mode="never",
    title="Company Brochure Generator",
    description="Generate a brochure for a company using its website contents"
).launch(share=True)




* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://8afc51fd178726d33e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 2098, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/blocks.py", line 1657, in call_function
    prediction = await utils.async_iteration(iterator)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/anaconda3/envs/llms/lib/python3.11/site-packages/gradio/utils.py", line 728, in a

Found links: {'links': [{'type': 'about page', 'url': 'https://www.accenture.com/in-en/about/company-index'}, {'type': 'sustainability page', 'url': 'https://www.accenture.com/in-en/about/corporate-sustainability'}, {'type': 'leadership page', 'url': 'https://www.accenture.com/in-en/about/leadership'}, {'type': 'careers page', 'url': 'https://www.accenture.com/in-en/careers'}, {'type': 'services page', 'url': 'https://www.accenture.com/in-en/services'}, {'type': 'cloud services', 'url': 'https://www.accenture.com/in-en/services/cloud'}, {'type': 'cybersecurity services', 'url': 'https://www.accenture.com/in-en/services/cybersecurity'}, {'type': 'data and AI services', 'url': 'https://www.accenture.com/in-en/services/data-ai'}, {'type': 'digital engineering and manufacturing services', 'url': 'https://www.accenture.com/in-en/services/digital-engineering-manufacturing'}, {'type': 'emerging technology services', 'url': 'https://www.accenture.com/in-en/services/emerging-technology'}, {'typ