In [None]:
import numpy as np
import yfinance as yf
# Download Google Ticker Data
data = yf.download('GOOG', start='2009-04-01', end='2010-01-01', interval='1d')
print(len(data.index))
for i in range(4, len(data.index), int(len(data.index)/10)):
    data.iloc[i] = np.nan
data.to_csv('GOOG_2009-2010_6m_RAW_1d.csv')

# 1. Libraries & Sample Data
The first step is to load our Python Libraries and download the sample data. The dataset represents Google stock price (1d bars) for the year 2010

In [None]:
# Load Python Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from IPython.display import display, HTML
# for dataframe display
pd.set_option("display.max_rows", None)
def display_df(df):
    # Puts the scrollbar next to the DataFrame
    display(HTML("<div style='height: 200px; overflow: auto; width: fit-content'>" + df.to_html() + "</div>"))

In [None]:
# Download Sample Data
data = pd.read_csv('GOOG_2009-2010_6m_RAW_1d.csv')

# 2. Exploratory Data Analysis
Next, we want to analyze our data. Display the data as a dataframe, and plot some relevant data so you can get an idea of what our dataset looks like.

In [None]:
# Display as Dataframe
display_df(data)

In [None]:
# Index data by Date
data.set_index('Date', inplace=True)
display_df(data)

In [None]:
# Plot the Close Data
data['Close'].plot()

# 3. Data Cleaning
Next, we need to clean our data for training our model. This requires removal of NaN values.

In [None]:
# Check for null values
print('Number of Null Values =', data.isnull().sum())

In [None]:
# forward fill missing values
data=data.ffill()
display_df(data)

In [None]:
# Check for null values
print('Number of Null Values =', data.isnull().sum())

# 4. Feature Definition
Now that we have cleaned our stock data, we can define some financial indicaors, or "features" to train our model on. We will be calculating some popular indicators: 20-day Close Moving Average, 5-day Close Moving Average, and 20-day Close Bollinger Bands.

In [None]:
data['MA5'] = data['Close'].rolling(window=5).mean()
data['MA20'] = data['Close'].rolling(window=20).mean()
data['STD20'] = data['Close'].rolling(window=20).std()
data['BB_upper'] = data['MA20'] + (data['STD20'] * 2)
data['BB_lower'] = data['MA20'] - (data['STD20'] * 2)
display_df(data)

In [None]:
# Remove rows with MA=NaN
data = data.dropna(axis=0)
display_df(data)

In [None]:
# Plot Features: Close, MA20, BB Upper, BB Lower
data['Close'].plot()
data['MA20'].plot()
data['BB_upper'].plot()
data['BB_lower'].plot(rot=45)

In [None]:
# Plot Features: Close, MA20, MA5
data['Close'].plot()
data['MA20'].plot()
data['MA5'].plot(rot=45)

# 5. State Space Representation
Now we have a set of data with OHLC data plus some techinchal indicators. Using this data, construct the state space matrix, whese features of the state space are Close Price, 5-day Moving Average, 20-day Moving Average, and Bollinger Bands (upper and lower).

In [None]:
# Construct the State Space Matrix
dataset = data[['Close', 'MA5', 'MA20', 'BB_upper', 'BB_lower']]
display_df(dataset)