In [1]:
# A) Install packages (run once; if you reopen the notebook another day, run again)
!pip -q install yfinance requests_html plotly bs4 lxml

# Imports
import yfinance as yf
import pandas as pd
import numpy as np
import re
from bs4 import BeautifulSoup
import requests
from io import StringIO
import plotly.graph_objects as go

pd.options.display.float_format = "{:,.2f}".format


In [2]:
def _to_number(x):
    """Convert '$12.3B', '4,567.8M', '—', '-' etc. to float (USD)."""
    if x is None:
        return np.nan
    s = str(x).strip()
    if s in {"", "-", "—", "N/A"}:
        return np.nan
    s = s.replace("$", "").replace(",", "")
    mult = 1.0
    if s.endswith(("B","b")):
        mult, s = 1e9, s[:-1]
    elif s.endswith(("M","m")):
        mult, s = 1e6, s[:-1]
    elif s.endswith(("K","k")):
        mult, s = 1e3, s[:-1]
    s = re.sub(r"[^0-9eE\.\-+]", "", s)
    try:
        return float(s) * mult
    except:
        return np.nan



In [3]:
def get_quarterly_revenue(ticker: str, company_slug: str) -> pd.DataFrame:
    """
    Scrape quarterly revenue from Macrotrends and return DataFrame with columns:
    ['Date','Revenue'] where Date is datetime and Revenue is float (USD).
    """
    url = f"https://www.macrotrends.net/stocks/charts/{ticker}/{company_slug}/revenue"
    html = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text
    soup = BeautifulSoup(html, "lxml")

    # Find the table whose nearest <h2> says 'Quarterly Revenue'
    target_tbl = None
    for tbl in soup.find_all("table"):
        h2 = tbl.find_previous("h2")
        if h2 and "Quarterly Revenue" in h2.get_text(strip=True):
            target_tbl = tbl
            break

    # Fallback: first table with 'Date' and 'Revenue' in the header
    if target_tbl is None:
        for tbl in soup.find_all("table"):
            thead = tbl.find("thead")
            if thead and "Revenue" in thead.get_text() and "Date" in thead.get_text():
                target_tbl = tbl
                break

    if target_tbl is None:
        raise RuntimeError("Quarterly revenue table not found on Macrotrends page.")

    # Read HTML safely (no FutureWarning)
    df = pd.read_html(StringIO(str(target_tbl)), flavor="lxml")[0]

    # Normalize columns
    df.columns = [c.strip() for c in df.columns]
    if "Revenue" not in df.columns:
        rev_col = [c for c in df.columns if "Revenue" in c][0]
        df = df.rename(columns={rev_col: "Revenue"})

    # Clean values
    df = df.dropna(subset=["Date", "Revenue"])
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    df["Revenue"] = df["Revenue"].apply(_to_number)
    df = df.dropna(subset=["Date", "Revenue"]).sort_values("Date").reset_index(drop=True)
    return df[["Date", "Revenue"]]


In [4]:
# Q1 - Extracting Tesla Stock Data Using yfinance
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)

print("TESLA stock data (head):")
display(tesla_data.head())

print("Columns:", list(tesla_data.columns))



TESLA stock data (head):


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.27,1.67,1.17,1.59,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.72,2.03,1.55,1.59,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.67,1.73,1.35,1.46,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.53,1.54,1.25,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.33,1.33,1.06,1.07,103003500,0.0,0.0


Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']


In [9]:
!pip -q install yfinance plotly bs4 lxml

import yfinance as yf
import pandas as pd
import numpy as np
import re
import requests
from bs4 import BeautifulSoup
from io import StringIO
import plotly.graph_objects as go

pd.options.display.float_format = "{:,.2f}".format


In [10]:
# Q1 - Extracting Tesla Stock Data Using yfinance
tesla = yf.Ticker("TSLA")
tesla_data = tesla.history(period="max")
tesla_data.reset_index(inplace=True)

print("TESLA stock data (head):")
display(tesla_data.head())
print("Columns:", list(tesla_data.columns))


TESLA stock data (head):


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2010-06-29 00:00:00-04:00,1.27,1.67,1.17,1.59,281494500,0.0,0.0
1,2010-06-30 00:00:00-04:00,1.72,2.03,1.55,1.59,257806500,0.0,0.0
2,2010-07-01 00:00:00-04:00,1.67,1.73,1.35,1.46,123282000,0.0,0.0
3,2010-07-02 00:00:00-04:00,1.53,1.54,1.25,1.28,77097000,0.0,0.0
4,2010-07-06 00:00:00-04:00,1.33,1.33,1.06,1.07,103003500,0.0,0.0


Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']


In [12]:
!rm -f tesla_revenue.csv


In [13]:
!curl -L "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/data/tesla_revenue.csv" -o tesla_revenue.csv


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   348  100   348    0     0   1559      0 --:--:-- --:--:-- --:--:--  1553


In [14]:
tesla_revenue = pd.read_csv("tesla_revenue.csv")
print("File loaded successfully! Shape:", tesla_revenue.shape)
display(tesla_revenue.head())


File loaded successfully! Shape: (0, 1)


Unnamed: 0,"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?><Error><Code>NoSuchKey</Code><Message>The specified key does not exist.</Message><Resource>/cf-courses-data/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/data/tesla_revenue.csv</Resource><RequestId>53367d3f-e026-468f-a492-9acc39d863a5</RequestId><httpStatusCode>404</httpStatusCode></Error>"


In [16]:
# Q2 - Extracting Tesla Revenue Data (using stable IBM dataset, no scraping)
!curl -L "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-PY0220EN-SkillsNetwork/data/tesla_revenue.csv" -o tesla_revenue.csv

tesla_revenue = pd.read_csv("tesla_revenue.csv")
tesla_revenue = tesla_revenue.dropna()
tesla_revenue["Revenue"] = (tesla_revenue["Revenue"]
                            .str.replace(",", "", regex=False)
                            .str.replace("$", "", regex=False)
                            .astype(float))
tesla_revenue["Date"] = pd.to_datetime(tesla_revenue["Date"])
tesla_revenue = tesla_revenue.sort_values("Date").reset_index(drop=True)

print("TESLA quarterly revenue (head):")
display(tesla_revenue.head())


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   348  100   348    0     0   1003      0 --:--:-- --:--:-- --:--:--  1005


KeyError: 'Revenue'

In [17]:
# Q3 - Extracting GameStop Stock Data Using yfinance
gamestop = yf.Ticker("GME")
gme_data = gamestop.history(period="max")
gme_data.reset_index(inplace=True)

print("GME stock data (head):")
display(gme_data.head())
print("Columns:", list(gme_data.columns))


GME stock data (head):


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2002-02-13 00:00:00-05:00,1.62,1.69,1.6,1.69,76216000,0.0,0.0
1,2002-02-14 00:00:00-05:00,1.71,1.72,1.67,1.68,11021600,0.0,0.0
2,2002-02-15 00:00:00-05:00,1.68,1.69,1.66,1.67,8389600,0.0,0.0
3,2002-02-19 00:00:00-05:00,1.67,1.67,1.58,1.61,7410400,0.0,0.0
4,2002-02-20 00:00:00-05:00,1.62,1.66,1.6,1.66,6892800,0.0,0.0


Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']
