# Question 2: Use Webscraping to Extract Tesla Revenue Data

# Import necessary libraries

In [1]:
import requests
import pandas as pd


# Verify installations 

In [2]:
import pkg_resources
installed_packages = {pkg.key for pkg in pkg_resources.working_set}
required_packages = {'requests', 'beautifulsoup4'}

if required_packages.issubset(installed_packages):
    print("All required packages are installed.")
else:
    print("Some required packages are missing.")

All required packages are installed.


# URL containing Tesla revenue data

In [3]:
import requests
import pandas as pd

url = "https://www.macrotrends.net/stocks/charts/TSLA/tesla/revenue"

# Set headers to mimic a browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/114.0.0.0 Safari/537.36"
}


# Request the page content

In [4]:
response = requests.get(url, headers=headers)

# Use pandas to read all tables from the HTML content

In [None]:
tables = pd.read_html(response.text)
print(f"Number of tables found: {len(tables)}")

# Select the Tesla revenue table

In [6]:
tesla_revenue_df = tables[1]

# Rename columns for clarity

In [7]:
tesla_revenue_df.columns = ['Date', 'Revenue']

# Drop any rows with missing values

In [8]:
tesla_revenue_df.dropna(inplace=True)

# Clean the 'Revenue' column: remove '$' and ',' then convert to float

In [9]:
tesla_revenue_df.loc[:, 'Revenue'] = (
    tesla_revenue_df['Revenue']
    .str.replace('$', '', regex=False)
    .str.replace(',', '', regex=False)
    .astype(float)
)


# Display the last five rows of the cleaned dataframe

In [10]:
tesla_revenue_df.tail()

Unnamed: 0,Date,Revenue
58,2010-09-30,31.0
59,2010-06-30,28.0
60,2010-03-31,21.0
62,2009-09-30,46.0
63,2009-06-30,27.0
