In [1]:
import numpy as np
import os
import datetime
import pandas as pd
import random
from scipy import stats
# from tqdm import tqdm
from matplotlib import pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from sklearn import preprocessing
from sklearn import datasets

In [2]:
from sklearn import model_selection
from sklearn import pipeline
from sklearn import linear_model
from sklearn import dummy
from sklearn import ensemble
from sklearn import metrics
from sklearn import cluster
from sklearn import decomposition

### Fitting a Line

In [11]:
# load data and split
boston = datasets.load_boston()
features = boston.data[:, 0:2]
target = boston.target
ols = linear_model.LinearRegression()
model = ols.fit(features, target)

In [5]:
model.intercept_, model.coef_

(22.485628113468223, array([-0.35207832,  0.11610909]))

### Interactive Feaures
features that depend on another

In [13]:
interaction = preprocessing.PolynomialFeatures(degree=3, include_bias=False, interaction_only=True)
features_interaction = interaction.fit_transform(features)
ols = linear_model.LinearRegression()
model = ols.fit(features_interaction, target)

In [15]:
model.intercept_, model.coef_

(22.07715825584366, array([-0.33715159,  0.08155747,  0.80662   ]))

### Non-Linear Relationships

In [16]:
# load data and split
boston = datasets.load_boston()
features = boston.data[:, 0:1]
target = boston.target
interaction = preprocessing.PolynomialFeatures(degree=3, include_bias=False)
features_poly = interaction.fit_transform(features)
ols = linear_model.LinearRegression()
model = ols.fit(features_poly, target)

In [17]:
model.intercept_, model.coef_

(25.190479369326752,
 array([-1.13640072e+00,  2.37848254e-02, -1.48872090e-04]))

### Regularization
- Need to standardize Prior to Training
- Ridge:tuning hyperparameter of the squared sum of all coefficients
- Lasso: tuning hyperparameter fo the sum of the absolute value of all coefficients
- Penalize large or complex models

In [18]:
# load data and split
boston = datasets.load_boston()
features = boston.data
target = boston.target
my_scaler = preprocessing.StandardScaler()
features_standard = my_scaler.fit_transform(features)

In [19]:
ols = linear_model.Ridge(alpha=0.5)
model = ols.fit(features_standard, target)
ridge_cv = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
model_cv = ridge_cv.fit(features_standard, target)

In [20]:
model.coef_, model_cv.coef_

(array([-0.92396151,  1.07393055,  0.12895159,  0.68346136, -2.0427575 ,
         2.67854971,  0.01627328, -3.09063352,  2.62636926, -2.04312573,
        -2.05646414,  0.8490591 , -3.73711409]),
 array([-0.91987132,  1.06646104,  0.11738487,  0.68512693, -2.02901013,
         2.68275376,  0.01315848, -3.07733968,  2.59153764, -2.0105579 ,
        -2.05238455,  0.84884839, -3.73066646]))

In [21]:
model_cv.alpha_

1.0

### Feature Reduction w/ Lasso Regression
- To keep it simple will reduce coefficients to 0

In [22]:
my_reg = linear_model.Lasso(alpha=0.5)
las_model = my_reg.fit(features_standard, target)
las_model.coef_

array([-0.11526463,  0.        , -0.        ,  0.39707879, -0.        ,
        2.97425861, -0.        , -0.17056942, -0.        , -0.        ,
       -1.59844856,  0.54313871, -3.66614361])