0. Setup and Imports

In [21]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import warnings

In [22]:
# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (14, 7)
plt.rcParams['axes.grid'] = True

1. Loading and Preprocessing Time Series Data

In [24]:
# Download 10 years of Apple stock data
start_date = '2015-02-01'
end_date = '2025-01-31'
ticker = 'AAPL'

In [25]:
# Fetch data using yfinance
df = yf.download(ticker, start=start_date, end=end_date)
print(f"Downloaded {ticker} stock data from {start_date} to {end_date}")
print(f"Shape of data: {df.shape}")

# Display the first few rows
print("\nFirst 5 rows of the data:")
display(df.head())

[*********************100%***********************]  1 of 1 completed

Downloaded AAPL stock data from 2015-02-01 to 2025-01-31
Shape of data: (2515, 5)

First 5 rows of the data:





Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-02-02,26.354658,26.474624,25.788155,26.225808,250956400
2015-02-03,26.359093,26.456841,26.128048,26.325769,207662800
2015-02-04,26.561255,26.772306,26.283557,26.325767,280598800
2015-02-05,26.750841,26.815522,26.596946,26.768683,168984800
2015-02-06,26.525576,26.819982,26.418518,26.768683,174826400


In [16]:
# If it's a MultiIndex, flatten the columns
if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.get_level_values(0)

df.head()

Price,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,24.288582,24.757336,23.848707,24.746228,212818400
2015-01-05,23.604332,24.137513,23.41772,24.057535,257142000
2015-01-06,23.606564,23.866489,23.244444,23.668768,263188400
2015-01-07,23.937567,24.037537,23.7043,23.815379,160423600
2015-01-08,24.857309,24.915071,24.148623,24.266369,237458000


In [18]:
df.columns

Index(['Close', 'High', 'Low', 'Open', 'Volume'], dtype='object', name='Price')

In [20]:
# Access Adj Close
ts = df['Close']
print(ts.head())

Date
2015-01-02    24.288582
2015-01-05    23.604332
2015-01-06    23.606564
2015-01-07    23.937567
2015-01-08    24.857309
Name: Close, dtype: float64
