# [Ridge and Lasso](http://statweb.stanford.edu/~tibs/sta305files/Rudyregularization.pdf)
  1. [Scikit_learn](http://scikit-learn.org/stable/) 
  2. [Scatter Plots](https://matplotlib.org/api/_as_gen/matplotlib.pyplot.scatter.html)
  3. [Mean Square Error](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html)
  4. [Cross Validation](http://scikit-learn.org/stable/modules/cross_validation.html)

In [1]:
# importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
pwd()

'/Users/rcrouch/Desktop/mlg-03-us-master/day_2/code/Solutions'

In [3]:
#importing the data
adv = pd.read_csv('/Users/rcrouch/Desktop/mlg_02_us-master/day_2/data/advertising.csv') 
adv.head(5) # top 5 few rows

Unnamed: 0,TV,Radio,Newspaper,Sales,Region
0,230.1,37.8,69.2,22.1,1
1,44.5,39.3,45.1,10.4,1
2,17.2,45.9,69.3,9.3,1
3,151.5,41.3,58.5,18.5,1
4,180.8,10.8,58.4,12.9,1


In [4]:
#Center Predictors
adv[['TV']] - np.nanmean(adv[['TV']])
adv[['Radio']] - np.nanmean(adv[['Radio']])
adv[['Newspaper']] - np.nanmean(adv[['Newspaper']])
adv.head()

Unnamed: 0,TV,Radio,Newspaper,Sales,Region
0,230.1,37.8,69.2,22.1,1
1,44.5,39.3,45.1,10.4,1
2,17.2,45.9,69.3,9.3,1
3,151.5,41.3,58.5,18.5,1
4,180.8,10.8,58.4,12.9,1


In [5]:
# Split data into train and test
train, test = train_test_split(adv, test_size=0.3, random_state=1)

In [6]:
# Convert them back into dataframes, for convenience
train = pd.DataFrame(data=train, columns=adv.columns)
test = pd.DataFrame(data=test, columns=adv.columns)

Estimating the coefficients

In [7]:
# Fit a linear regression model using OLS
from sklearn.linear_model import LinearRegression
slm = LinearRegression()
slm.fit(train[['TV','Newspaper']], train['Sales']) # obtaining fit only based on Tv and Newspaper.

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [8]:
# Evaluate the output
slm.intercept_
slm.coef_
print(slm.intercept_)
print(slm.coef_)

5.462665238271402
[0.04725676 0.05275376]


In [9]:
# Fit a linear regression model using Ridge
from sklearn.linear_model import Ridge
ridge = Ridge()
ridge.fit(train[['TV','Newspaper']], train['Sales']) # obtaining fit only based on Tv and Newspaper.

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [10]:
# Evaluate the output
ridge.intercept_
ridge.coef_
print(ridge.intercept_)
print(ridge.coef_)

5.462693992540023
[0.04725673 0.05275296]


In [11]:
# Fit a linear regression model using Lasso
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.fit(train[['TV','Newspaper']], train['Sales']) # obtaining fit only based on Tv and Newspaper.

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [12]:
# Evaluate the output
lasso.intercept_
lasso.coef_
print(lasso.intercept_)
print(lasso.coef_)

5.5408539335995854
[0.04715844 0.05061755]


In [13]:
# RIDGE vs LASSO
# Evaluate the fit of the models based off of the training set
# Evaluate the model fit based off of cross validation

In [14]:
# Ridge based on training
preds = ridge.predict(test[['TV','Newspaper']]) #  prdicting the sales of test dataset based on TV and Newspaper 
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(test['Sales'],preds)) #RMSE obtained by ridge

3.2564489770239113

In [15]:
# Cross validation
from sklearn.cross_validation import cross_val_score
scores = cross_val_score(ridge, adv[['TV','Newspaper']], adv['Sales'], cv=5, scoring='mean_squared_error')
np.mean(np.sqrt(-scores))

  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)


3.1286122746837877

In [25]:
'''# Lasso (try yourself)'''

'# Lasso (try yourself)'

In [16]:
predsl = lasso.predict(test[['TV','Newspaper']]) #  prdicting the sales of test dataset based on TV and Newspaper 
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(test['Sales'],predsl)) #RMSE obtained by ridge

3.247558995642003

In [None]:
'''# Cross Validation (try yourself)'''

In [17]:
# Cross validation
scores = cross_val_score(lasso, adv[['TV','Newspaper']], adv['Sales'], cv=5, scoring='mean_squared_error')
np.mean(np.sqrt(-scores))

  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)


3.1294126929608086

In [None]:
'''EXERCISE: (10 min)
1) Run all three types of multiple linear regressions with all of your features. 
        Which coefficients have higher values?
        What does this suggest practically?
2) Calculate the 5-fold CV RMSE. Is it better or worse than before?
'''

In [17]:
train.head()

Unnamed: 0,TV,Radio,Newspaper,Sales,Region
116,139.2,14.3,25.6,12.2,3
67,139.3,14.5,10.2,13.4,2
78,5.4,29.9,9.4,5.3,2
42,293.6,27.7,1.8,20.7,1
17,281.4,39.6,55.8,24.4,1


In [18]:
# Fit a linear regression model using OLS
from sklearn.linear_model import LinearRegression
slm2 = LinearRegression()
slm2.fit(train[['TV', 'Radio', 'Newspaper', 'Region']], train['Sales']) 
slm2.intercept_
slm2.coef_
print(slm2.intercept_)
print(slm2.coef_)

3.0381921626156743
[ 0.04695741  0.1764743   0.00156295 -0.03578489]


In [19]:
predsols = slm2.predict(test[['TV', 'Radio', 'Newspaper', 'Region']]) 
np.sqrt(mean_squared_error(test['Sales'],predsols)) 

1.3866307484195066

In [20]:
# Cross validation
scores = cross_val_score(slm2, adv[['TV', 'Radio', 'Newspaper', 'Region']], adv['Sales'], cv=5, scoring='mean_squared_error')
np.mean(np.sqrt(-scores))

  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)


1.7349202650773887

In [21]:
# Ridge Regression
ridge2 = Ridge()
ridge2.fit(train[['TV', 'Radio', 'Newspaper', 'Region']], train['Sales'])
ridge2.intercept_
ridge2.coef_
print(ridge2.intercept_)
print(ridge2.coef_)

3.0376568259986367
[ 0.04695735  0.17646848  0.00156663 -0.03555973]


In [22]:
predsr = ridge2.predict(test[['TV', 'Radio', 'Newspaper', 'Region']]) 
np.sqrt(mean_squared_error(test['Sales'],predsr)) 

1.3866749565326966

In [23]:
# Cross validation
scores = cross_val_score(ridge2, adv[['TV', 'Radio', 'Newspaper', 'Region']], adv['Sales'], cv=5, scoring='mean_squared_error')
np.mean(np.sqrt(-scores))

  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)


1.7347292152708644

In [24]:
# Lasso
lasso2 = Lasso()
lasso2.fit(train[['TV', 'Radio', 'Newspaper', 'Region']], train['Sales']) 
lasso2.intercept_
lasso2.coef_
print(lasso2.intercept_)
print(lasso2.coef_)

3.06810140706013
[ 0.04686006  0.17290195  0.00077704 -0.        ]


In [25]:
predsl = lasso2.predict(test[['TV', 'Radio', 'Newspaper', 'Region']]) 
np.sqrt(mean_squared_error(test['Sales'],predsl)) 

1.4075592889362203

In [26]:
# Cross validation
scores = cross_val_score(lasso2, adv[['TV', 'Radio', 'Newspaper', 'Region']], adv['Sales'], cv=5, scoring='mean_squared_error')
np.mean(np.sqrt(-scores))

  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)
  sample_weight=sample_weight)


1.7108083257624553

In [None]:
'''EXERCISE: (20 min)
1) Perform an EDA of a new dataset: credit.csv
2) Determine your target variable and features
3) Select a model: Ridge, Lasso, OLS
4) Support your selections to your client
'''