# Built-in list methods

In [None]:
## Remember that lists are mutable. These methods allow you to modify lists
## "in place", meaning you don't need to re-assign to an object

# Sample list
my_list = [10, 20, 30, 40, 50]

# 1. append() - Adds an element to the end of the list
my_list.append(60)
print("After append:", my_list)  # Output: [10, 20, 30, 40, 50, 60]

# 2. insert() - Inserts an element at a specified position
my_list.insert(2, 25)  # Insert 25 at index 2
print("After insert:", my_list)  # Output: [10, 20, 25, 30, 40, 50, 60]

# 3. extend() - Extends the list by appending elements from another list
my_list.extend([70, 80])
print("After extend:", my_list)  # Output: [10, 20, 25, 30, 40, 50, 60, 70, 80]

# 4. remove() - Removes the first occurrence of a value
my_list.remove(25)  # Remove the first occurrence of 25
print("After remove:", my_list)  # Output: [10, 20, 30, 40, 50, 60, 70, 80]

# 5. pop() - Removes and returns an element at a specified index (or the last element by default)
popped_value = my_list.pop(3)  # Remove the element at index 3
print("After pop:", my_list)  # Output: [10, 20, 30, 50, 60, 70, 80]
print("Popped value:", popped_value)  # Output: 40

# 6. index() - Returns the index of the first occurrence of a value
index_of_50 = my_list.index(50)
print("Index of 50:", index_of_50)  # Output: 3

# 7. count() - Returns the number of occurrences of a value
count_of_60 = my_list.count(60)
print("Count of 60:", count_of_60)  # Output: 1

# 8. sort() - Sorts the list in ascending order (can also sort in descending order)
my_list.sort()  # Sort in ascending order
print("After sort (ascending):", my_list)  # Output: [10, 20, 30, 50, 60, 70, 80]

my_list.sort(reverse=True)  # Sort in descending order
print("After sort (descending):", my_list)  # Output: [80, 70, 60, 50, 30, 20, 10]

# 9. reverse() - Reverses the order of the list
my_list.reverse()
print("After reverse:", my_list)  # Output: [10, 20, 30, 50, 60, 70, 80]

# 10. copy() - Returns a shallow copy of the list
copied_list = my_list.copy()
print("Copied list:", copied_list)  # Output: [10, 20, 30, 50, 60, 70, 80]

# 11. clear() - Removes all elements from the list
my_list.clear()
print("After clear:", my_list)  # Output: []

# Recap: Lists are mutable.


# Cleaning up messy dataframes

### Make a dataframe with issues

In [None]:
# Bonus: Writing functions for data cleaning

import pandas as pd
import numpy as np

# Create a DataFrame with issues in the Model and Price columns
data = {
    "Make": ["Volkswagen", "Fiat", "Ferrari", "Ferrari", "Volkswagen", "Volkswagen", "Volkswagen", "Volkswagen", "Volkswagen",
             "Fiat", "Fiat", "Fiat", "Fiat", "Fiat",
             "Ferrari", "Ferrari", "Ferrari", "Ferrari", "Ferrari",
             "Alfa Romeo", "Alfa Romeo", "Alfa Romeo", "Alfa Romeo", "Alfa Romeo",
             "Subaru", "Subaru", "Subaru", "Subaru", "Subaru",
             "Chevrolet", "Chevrolet", "Chevrolet", "Chevrolet", "Chevrolet"],
    "Model": ["VW Jetta", "Panda", "250 GTO", "California", "Volkswagen Golf", "Volkswagen Passat", "Volkswagen Tiguan", "VW Polo", "Volkswagen Arteon",
              "Fiat 500", "Fiat Tipo", "Fiat Punto", "Fiat Panda Cross", "Fiat 124 Spider",
              "Ferrari 488 Spider", "Ferrari F8 Tributo", "Ferrari Portofino", "Ferrari Roma", "Ferrari SF90 Stradale",
              "Alfa Giulia", "Stelvio", "Alfa 4C", "Alfa GTV", "Alfa Romeo Giulietta",
              "Subaru Impreza", "Subaru Outback", "Subaru Forester", "Subaru Crosstrek", "Subaru WRX",
              "Chevrolet Malibu", "Chevrolet Camaro", "Chevy Equinox", "Chevrolet Tahoe", "Chevy Traverse"],
    "Year": [2024, 1987, 1963, 2021, 2022, 2023, 2021, 2020, 2022,
             1990, 2018, 2005, 2019, 2017,
             2020, 2019, 2021, 2021, 2022,
             2021, 2020, 2018, 2019, 2019,
             2021, 2020, 2019, 2022, 2022,
             2019, 2022, 2021, 2021, 2020],
    "Price": ["$24875", "Price is $8200", "($70000000)", "$255995", "$23000", "$27000", "Price: $25000", "€22000", "35,000",
              "9500 dollars", "$18500", "15k", "$12000", "25000 USD",
              "€280000", "Price is $320000", "$215000", "222000", "($625000)",
              "43,000", "$41000", "56k", "$45000", "39,000",
              "23,000", "$27000", "Price: $25000", "24k", "36000",
              "$25000", "$35000", "$32000", "45000", "$40000"]
}

# Create DataFrame
df = pd.DataFrame(data)

df = df.sort_values(by = ["Make", "Model"])

# Backup the DataFrame
df_backup = df.copy()

# View the df
# print(df)
# print()

# There are two issues here:

## 1. We have redundant make names in "Model" column. E.g., "Alfa 4C", "VW Jetta"
print(df[df["Model"].str.contains(" ")].Model.values)
print()

## 2. Prices are formatted weird, and we want straightforward numbers
print(df["Price"].values)
print()


### Step 1: Fix redundant info in the "Model" column

In [None]:
# First, we can write a function that works on individual row values in a column

def clean_model(row, min_matches=2):
    make = row['Make'].lower().replace(' ', '')  # Remove spaces from make name for matching
    model = row['Model']

    # Split the model name into words
    model_words = model.split()

    # Remove words with min_matches or more letter matches to the make name, except the last word
    cleaned_model_words = []
    for i, word in enumerate(model_words):
        if i == len(model_words) - 1:
            cleaned_model_words.append(word)
        else:
            # Check if the word has min_matches or more letters from the make
            word_lower = word.lower()
            match_count = sum(1 for char in word_lower if char in make)
            if match_count < min_matches:
                cleaned_model_words.append(word)

    # Join the remaining words to form the cleaned model name
    model = ' '.join(cleaned_model_words)

    return model

# Then we can apply our function to each row in the 'Model' column using a method in pandas called .apply()

df['Model'] = df.apply(clean_model, axis=1)

# View the cleaned DataFrame
df # Note the changes in the Model column



#### How does this work?

In [None]:
df = df_backup.copy()

# First inspect the input to the function (DataFrame row)

# pick a row number to test
test_row = 0

row = df.iloc[test_row]
print("Our target row:")
print(row)
print()

# our function has a default argument, 'min_matches=2', which acts like an input
min_matches = 2

# Then we'll look at the first variables created in the function (make & model)

make = row['Make'].lower().replace(' ', '')  # We're converting this to lowercase and removing spaces
print(f"Make: \n{make}")
print()

model = row['Model']
print(f"Model: \n{model}")
print()

# Split the model name into words

model_words = model.split()
print(f"Model Words: \n{model_words}")
print()

# Iterate through the model words, except the last word, and check for matches

# Initialize an empty list to hold the cleaned model words
cleaned_model_words = []

# Enumerate through the model words to get both the index and the word
for i, word in enumerate(model_words):
    # If this is the last word, always add it to the cleaned_model_words list
    print(f"Current word: {word}")
    if i == len(model_words) - 1:
        print(f"Keep the last word in the model name: {word}")
        cleaned_model_words.append(word)
    else:
        # Convert the word to lowercase for comparison
        word_lower = word.lower()
        print(f"Current word (to lowercase): {word_lower}")

        # Count the number of characters in the word that are also in the make name
        match_count = sum(1 for char in word_lower if char in make)
        print(f"Match count for '{word_lower}' with make '{make}': {match_count}")

        # If the match count is less than the minimum matches, add the word to cleaned_model_words
        if match_count < min_matches:
            print(f"Appending word (less than {min_matches} matches): {word}")
            cleaned_model_words.append(word)
        else:
            print(f"Target word (has {match_count} matching characters): {word}")
    print()

# The cleaned_model_words list now contains the filtered model words
print()
print(f"Cleaned Model Name: {cleaned_model_words}")
print()


### Step 2: Clean the "Price" column

In [None]:
# Writing a function to clean price data

def clean_price(price):
    # Convert price to lowercase for consistent processing
    price = price.lower()

    # Handle 'k' for thousands
    if 'k' in price:
        price = price.replace('k', '000')

    # Handle conversion from euros to dollars
    euro_to_dollar_rate = 1.1
    if '€' in price or 'eur' in price:
        price = price.replace('€', '').replace('eur', '').strip()
        is_euro = True
    else:
        is_euro = False

    # Remove non-numeric characters except for periods
    num_str = ''
    for char in price:
        if char.isdigit() or char == '.':
            num_str += char

    # Convert to float if possible
    if num_str:
        price = float(num_str)
        if is_euro:
            price *= euro_to_dollar_rate
    else:
        return None

    return price

# Then we can apply our function to each row in the 'Price' column using a method in pandas called .apply()

df['Price'] = df['Price'].apply(clean_price)

# View the cleaned DataFrame
df # Note that prices are converted to numeric values and standardized to dollars



#### How does this work?

In [None]:
df = df_backup.copy()

test_row = 12
row = df.iloc[test_row]

# Inspect the input to the function

price = row['Price']
print("Original Price:")
print(price)
print()

# Convert price to lowercase for consistent processing

price = price.lower()

# Handle 'k' for thousands

if 'k' in price:
    print("'k' detected")
    price = price.replace('k', '000')
    print(f"'k' to thousands: {price}")
else:
    print("No 'k' detected")
print()

# Handle conversion from euros to dollars

euro_to_dollar_rate = 1.1
if '€' in price or 'eur' in price:
    price = price.replace('€', '').replace('eur', '').strip()
    is_euro = True
else:
    is_euro = False
print(f"Price in Euro?: {is_euro}")
print()

# Remove non-numeric characters except for periods

num_str = ''
for char in price:
    if char.isdigit() or char == '.':
        num_str += char

# Convert to float if possible

if num_str:
    price = float(num_str)
    if is_euro:
        price *= euro_to_dollar_rate
        print(f"Final Price (euro to USD): {price}")
    else:
        print(f"Final Price (already in USD): {price}")

# Plotting

### Load Python plotting modules

In [None]:
## Load Python plotting modules

import matplotlib.pyplot as plt
import seaborn as sns

# Set global style for the plots with 'seaborn' module
sns.set(style="whitegrid")

### Using Matplotlib

In [None]:
# load an example dataset
mpg = sns.load_dataset("mpg")

In [None]:
# Building out a basic plot

# Plot the data (default settings)
plt.plot(mpg['model_year'], mpg['mpg'])
# First positional argument: df['model_year'], is the data for the x-axis
# Second positional argument: df['mpg'] is the data for the y-axis

# NOTE: matplotlib does not automatically sort x-axis values, but plots in the order specified by your data

In [None]:
# Adding keyword arguments to Matplotlib

plt.figure(figsize=(10, 6))
plt.plot(mpg['model_year'], mpg['mpg'], marker='o', color='b', linewidth=0)
# marker='o' adds a circle marker at each data point
# linestyle='-' connects the data points with a line
# color='b' sets the color of the line to blue

# Add title and labels
plt.title('Car MPG Over Years')  # Set the title of the plot
plt.xlabel('Year')  # Set the label for the x-axis
plt.ylabel('MPG')  # Set the label for the y-axis

# Show the plot
plt.show()  # Display the plot



colors = {3: 'blue', 4: 'orange', 5: 'green', 6: 'red', 8: 'purple'}

# Create the scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(mpg['model_year'], mpg['mpg'], c=mpg['cylinders'].map(colors), alpha = 0.5)

for cyl, color in colors.items():
    plt.scatter([], [], c=color, label=f'{cyl} cylinders')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5)

plt.title('Car MPG Over Years')  # Set the title of the plot
plt.xlabel('Year')  # Set the label for the x-axis
plt.ylabel('MPG')  # Set the label for the y-axis

plt.show()

### Using Seaborn

In [None]:
# Basic plot with seaborn

# Seaborn is a high-level interface built on top of Matplotlib
# that integrates directly with Matplotlib's plotting functions.
# When you create a plot using Seaborn, it utilizes Matplotlib's underlying
# structure to generate and display the plot (Note that we still call 'plt').

# Create a scatter plot showing the relationship between horsepower and miles per gallon
plt.figure(figsize=(10, 6))
sns.scatterplot(data=mpg, x='horsepower', y='mpg', hue='origin', palette='deep', s=100, alpha = 0.75)

# Add title and labels
plt.title('MPG vs Horsepower')
plt.xlabel('Horsepower')
plt.ylabel('Miles per Gallon')

# Show the plot
plt.show()


In [None]:
# Create a histogram showing the distribution of miles per gallon
plt.figure(figsize=(10, 6))
sns.histplot(data=mpg, x='mpg', bins=20, kde=True, color='blue') # kde adds kernel density estimate line

# Add title and labels
plt.title('Distribution of Miles per Gallon')
plt.xlabel('Miles per Gallon')
plt.ylabel('Frequency')

# Show the plot
plt.show()


In [None]:
# Create a violin plot showing the distribution of miles per gallon by origin
plt.figure(figsize=(10, 6))
sns.violinplot(data=mpg, x='origin', y='mpg', palette='muted')

# Add title and labels
plt.title('MPG by Origin')
plt.xlabel('Origin')
plt.ylabel('Miles per Gallon')

# Show the plot
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Load the mpg dataset from seaborn
mpg = sns.load_dataset('mpg')

# Define the unique cylinder counts
cylinder_counts = sorted(mpg['cylinders'].unique()) # [(mpg['cylinders'] == 4) | (mpg['cylinders'] == 6)]

# Set up the matplotlib figure with subplots
fig, axes = plt.subplots(nrows=len(cylinder_counts), ncols=1, figsize=(10, len(cylinder_counts) * 5), sharex=True)

# Create a plot for each cylinder count
for i, cylinders in enumerate(cylinder_counts):
    ax = axes[i]
    sns.violinplot(data=mpg[mpg['cylinders'] == cylinders], x='origin', y='mpg', palette='muted', ax=ax)
    ax.set_title(f'MPG by Origin for {cylinders} Cylinders')
    ax.set_xlabel('Origin')
    ax.set_ylabel('Miles per Gallon')

# Adjust layout
plt.tight_layout()
plt.show()


In [None]:
# Create a pair plot to show relationships between different numeric variables
sns.pairplot(data=mpg, vars=['mpg', 'horsepower', 'weight', 'acceleration'], hue='origin', palette='bright')

# Add title
plt.suptitle('Pair Plot of MPG, Horsepower, Weight, and Acceleration', y=1.02)

# Show the plot
plt.show()


In [None]:
# Make a correlation matrix for numeric variables in the mpg dataset

# Calculate the correlation matrix with numeric values only
corr_matrix = mpg.select_dtypes(include=['number']).corr()

# Create a heatmap to show the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)

# Add title
plt.title('Correlation Matrix Heatmap (Numeric Values Only)')

# Show the plot
plt.show()


In [None]:
# plot acceleration as a function of cylinder count and weight

# Set the theme
sns.set_theme()

# Catplot = categorical plot
catplot = sns.catplot(
    data=mpg, x="cylinders", y="acceleration", hue="weight", s = 75, alpha = 0.5,
    native_scale=True, zorder=1, height=8, aspect=1.5  # aspect determines the width, zorder determines stacking order of plot elements
)

# Add a regression line
sns.regplot(
    data=mpg, x="cylinders", y="acceleration",
    scatter=False, truncate=False, order=2, color=".2", ax=catplot.ax # here, order refers to the polynomial order
)

# Show the plot
plt.show()


### More Seaborn examples

In [None]:
# Using different datasets

df = sns.load_dataset('tips')

plt.figure(figsize=(10, 6))
sns.stripplot(x='day', y='total_bill', data=df, color='blue', jitter=True, size=8, alpha=0.2)
sns.boxplot(x='day', y='total_bill', data=df, palette='pastel')


# Add title and labels
plt.title('Total Bill by Day of the Week')
plt.xlabel('Day')
plt.ylabel('Total Bill')

# Show the plot
plt.show()

In [None]:
sns.set_theme(style="darkgrid")

# Load an example dataset with long-form data
fmri = sns.load_dataset("fmri")

# Plot the responses for different events and regions
sns.lineplot(x="timepoint", y="signal",
             hue="region", style="event",
             data=fmri)

In [None]:
sns.set_theme(style="whitegrid")

# Load the example diamonds dataset
diamonds = sns.load_dataset("diamonds")

# Draw a scatter plot while assigning point colors and sizes to different
# variables in the dataset
f, ax = plt.subplots(figsize=(6.5, 6.5))
sns.despine(f, left=True, bottom=True)
clarity_ranking = ["I1", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"]
sns.scatterplot(x="carat", y="price",
                hue="clarity", size="depth",
                palette="ch:r=-.2,d=.3_r",
                hue_order=clarity_ranking,
                sizes=(1, 8), linewidth=0,
                data=diamonds, ax=ax)

In [None]:
sns.set_theme(style="white")

df = sns.load_dataset("penguins")

g = sns.JointGrid(data=df, x="body_mass_g", y="bill_depth_mm", space=0)
g.plot_joint(sns.kdeplot,
             fill=True, clip=((2200, 6800), (10, 25)),
             thresh=0, levels=100, cmap="rocket")
g.plot_marginals(sns.histplot, color="#03051A", alpha=1, bins=25)

In [None]:
sns.set_theme()

# Load the brain networks example dataset
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)

# Select a subset of the networks
used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
used_columns = (df.columns.get_level_values("network")
                          .astype(int)
                          .isin(used_networks))
df = df.loc[:, used_columns]

# Create a categorical palette to identify the networks
network_pal = sns.husl_palette(8, s=.45)
network_lut = dict(zip(map(str, used_networks), network_pal))

# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values("network")
network_colors = pd.Series(networks, index=df.columns).map(network_lut)

# Draw the full plot
g = sns.clustermap(df.corr(), center=0, cmap="vlag",
                   row_colors=network_colors, col_colors=network_colors,
                   dendrogram_ratio=(.1, .2),
                   cbar_pos=(.02, .32, .03, .2),
                   linewidths=.75, figsize=(12, 13))

g.ax_row_dendrogram.remove()


# More Involved Plots (with web-scraping)

In [None]:
# installing a module within a jupyter notebook

# in iPython 7.3 or above (magic)
%pip install yfinance

# Alternative for older versions (uncomment two lines below)
# import sys
# !{sys.executable} -m pip install yfinance

import yfinance as yf
import matplotlib.pyplot as plt
import datetime

# Define the time period
start = datetime.datetime(2019, 1, 1)
end = datetime.datetime(2024, 5, 1)

# Fetch data for a specific stock (e.g., Apple)
df = yf.download('NVDA', start=start, end=end)

# Plot the closing price
plt.figure(figsize=(16, 8))
plt.plot(df.index, df['Close'], marker='o', linestyle='-', color='b', alpha = 0.2)

# Add title and labels
plt.title('Nvidia Stock Price (2019-2024)')
plt.xlabel('Date')
plt.ylabel('Closing Price (USD)')

# Show the plot
plt.show()


In [None]:
# Fetch NVDA stock data
nvda = yf.Ticker('NVDA')
nvda_data = nvda.history(start='2019-01-01', end='2024-05-01')

# Convert the datetime index to date only
nvda_data.index = nvda_data.index.date

# Fetch the closing price
nvda_close = nvda_data['Close']

# Get earnings dates
earnings = nvda.get_earnings_dates(limit = 50)

# Filter earnings dates for the relevant period
earnings = earnings[(earnings.index >= '2019-01-01') & (earnings.index <= '2024-05-01')]

# Convert earnings dates to date only and create a DataFrame
earnings_dates = pd.DataFrame(earnings.index.date, columns=['date'])

# Add a quarter column to the DataFrame
earnings_dates['quarter'] = earnings_dates['date'].astype('datetime64[ns]').dt.to_period('Q')

# Drop duplicate quarters, keeping the first occurrence
earnings_dates = earnings_dates.drop_duplicates(subset='quarter', keep='first')

# Extract the filtered dates as a list
filtered_earnings_dates = earnings_dates['date'].tolist()

# Plot the closing price
plt.figure(figsize=(16, 8))
plt.plot(nvda_close.index, nvda_close, marker='o', linestyle='-', color='b', label='NVDA Closing Price', alpha = 0.2)

# Add colored dots on filtered earnings call dates
for date in filtered_earnings_dates:
    if date in nvda_close.index:
        plt.scatter(date, nvda_close[date], color='r', zorder=5, s = 100, marker = 'x')  # zorder to bring dots to front

# Add title and labels
plt.title('NVIDIA Stock Price with Quarterly Earnings Call Dates (2019-2024)')
plt.xlabel('Date')
plt.ylabel('Closing Price (USD)')
plt.legend()
plt.show()


In [None]:
# Plot stock moves after earnings calls

# Calculate stock moves after earnings calls
earnings_moves = []
for date in filtered_earnings_dates:
    next_day = date + pd.Timedelta(days=1)
    if next_day in nvda_close.index:
        move = nvda_close[next_day] - nvda_close[date]
        earnings_moves.append({'date': date, 'move': move})

# Convert to DataFrame
earnings_moves_df = pd.DataFrame(earnings_moves)

# Plot the stock moves
plt.figure(figsize=(12, 8))
plt.bar(earnings_moves_df['date'], earnings_moves_df['move'], color='b', width = 10)
plt.axhline(0, color='r', linestyle='--', linewidth = 1, alpha = 0.5)

# Hide gridlines
plt.grid(False)

# Add title and labels
plt.title('NVDA Stock Moves After Earnings Calls (Following Day)')
plt.xlabel('Earnings Call Date')
plt.ylabel('Stock Move (USD)')

# Limit the y-axis to the maximum values with some padding
y_max = earnings_moves_df['move'].max()
y_min = earnings_moves_df['move'].min()
padding = (y_max - y_min) * 0.1  # 10% padding
plt.ylim(y_min - padding, y_max + padding)

# Show the plot
plt.show()


In [None]:
# Plot stock market volatility after major events

%pip install bs4
%pip install requests

import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
from bs4 import BeautifulSoup
import requests

### Scrape Wikipedia webpage for historic events

# URL for Wiki page
url = "https://en.wikipedia.org/wiki/List_of_stock_market_crashes_and_bear_markets"

# Fetch the content from the URL
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Find the table in the Wikipedia page
table = soup.find('table', {'class': 'wikitable'})

# Extract the data from the table
data = []
for row in table.find_all('tr')[1:]:
    cells = row.find_all('td')
    if len(cells) >= 4:
        date = cells[1].get_text(strip=True)
        event = cells[0].get_text(strip=True)
        data.append([date, event])

# Convert data to DataFrame (just get date and event title)
df = pd.DataFrame(data, columns=['Date', 'Event'])

# Convert 'Date' to datetime data type, errors='coerce' will convert invalid dates to NaT
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Remove dates before 2000
df = df[df['Date'].dt.year >= 2000]

# Drop rows with NaT values in 'Date'
df = df.dropna(subset=['Date'])


## Keep one event per quarter (for cleaner plotting)

# Sort events by date in descending order
df = df.sort_values(by='Date', ascending=False)

# filter events
filtered_events = [] # empty list for events we want to plot
last_date = None
for index, row in df.iterrows():
    if last_date is None or (last_date - row['Date']).days > 200:
        filtered_events.append(row)
        last_date = row['Date']

# Convert filtered events back to a DataFrame
df = pd.DataFrame(filtered_events)


### Volatility data

# Fetch VIX historical data
vix_data = yf.download('^VIX', start='2000-01-01', end='2024-05-01')

# Convert the datetime index to date only
vix_data.index = vix_data.index.date

# Fetch the closing price
vix_close = vix_data['Close']


### Plotting

# Plot the closing price
plt.figure(figsize=(14, 8))
plt.plot(vix_close.index, vix_close, linestyle='-', color='b', label='VIX Closing Price')

# Highlight historical events
for _, row in df.iterrows(): # df.iterrows() iterates through rows in the dataframe
    plt.axvline(x=row['Date'], color='r', linestyle='--', linewidth=1)
    plt.text(row['Date']+datetime.timedelta(days=30), plt.ylim()[1]*1.7, row['Event'], rotation=90, verticalalignment='top', fontsize=12, color='r')
# row['Date']+datetime.timedelta(days=30) sets the position for the text relative to dotted lines (moving it 30 days to the right)

# Add title and labels
plt.xlabel('Date')
plt.ylabel('VIX Close')
plt.title('CBOE Volatility Index (2000 - Present)')
plt.ylim(0, 150)

# Hide gridlines
plt.grid(False)

# Show the plot
plt.show()
