<a href="https://colab.research.google.com/github/wardspan/Oreilly_Stuff/blob/master/O'Reilly_Class_Ridge_and_Lasso_Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Import Python Libraries
import numpy as np
import pandas as pd
from datetime import datetime

import pandas_datareader.data as pdr
import fix_yahoo_finance as yf
yf.pdr_override()

import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [0]:
#Import data
start = datetime(2017, 3, 10)
end = datetime(2020, 5, 13)

stock = pdr.get_data_yahoo('AAPL', start, end)
market = pdr.get_data_yahoo('SPY', start, end) #S&P 500 index
vix = pdr.get_data_yahoo('^VIX', start, end)   #Volatility index
dxy = pdr.get_data_yahoo('UUP', start, end)    #Dollar index
junk = pdr.get_data_yahoo('JNK', start, end)   #Junk bond index

In [0]:
#Create target dataframe
target = pd.DataFrame()
#Use adjusted closing prices instead of closing prices to adjust for corporate actions such as dividends, splits and mergers
target['return'] = stock['Adj Close'].pct_change(1)*100
target = target.dropna()
target.head()

Unnamed: 0_level_0,return
Date,Unnamed: 1_level_1
2017-03-13,0.043105
2017-03-14,-0.15085
2017-03-15,1.057649
2017-03-16,0.163723
2017-03-17,-0.497511


In [0]:
#Create features dataframe
features = pd.DataFrame()
features['market'] = market['Adj Close'].pct_change(1)*100
#VIX is volatility index and is measured in percentage terms
features['vix'] = vix['Adj Close'].diff()
features['dxy'] = dxy['Adj Close'].pct_change(1)*100
features['junk'] = junk['Adj Close'].pct_change(1)*100
features = features.dropna()
features.tail()

Unnamed: 0_level_0,market,vix,dxy,junk
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-05-06,-0.677872,0.509998,0.481664,-0.379218
2020-05-07,1.206682,-2.679998,-0.294985,0.257202
2020-05-08,1.654619,-3.460001,-0.036982,0.974864
2020-05-11,0.020516,-0.41,0.369952,-0.426828
2020-05-12,-1.993158,5.470001,-0.184294,-0.051034


In [0]:
#Examine coefficients of Linear Regression
from sklearn.linear_model import LinearRegression
regression = LinearRegression()
model = regression.fit(features, target)
print("Bias:", model.intercept_)
print("Coefficients:", model.coef_)

Bias: [0.07456363]
Coefficients: [[ 1.31322259  0.00617595  0.14945602 -0.26799855]]


In [0]:
#Remove less informative features with Lasso Regression
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

regression = Lasso (alpha=0.5)
lasso_model = regression.fit(features_standardized, target)
lasso_model.coef_

array([ 1.08336998, -0.        ,  0.        ,  0.        ])

In [0]:
#Reduce effects of all coefficients with Ridge Regression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

regression = Ridge (alpha=100)
ridge_model = regression.fit(features_standardized, target)
ridge_model.coef_

array([[ 1.11058322, -0.26239932,  0.07086022,  0.14260018]])