In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn import datasets

In [2]:
data = datasets.load_boston()
print(dir(data))

['DESCR', 'data', 'feature_names', 'filename', 'target']


In [3]:
print(data.data.shape)

(506, 13)


In [4]:
#Regressors 
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet

### Linear Regression

In [5]:
#X,y
X_LR = data.data
y_LR = data.target

In [6]:
#Train_Test Split
X_LR_train,X_LR_test,y_LR_train,y_LR_test = train_test_split(X_LR,y_LR,test_size=0.3,random_state=42)

In [7]:
#Initiate
reg = LinearRegression()

In [8]:
#Fit
reg.fit(X_LR_train,y_LR_train)
#Predict 
y_LR_pred = reg.predict(X_LR_test)

In [32]:
print(pd.DataFrame({'Actual': y_LR_test, 'Predicted': y_LR_pred}))

     Actual  Predicted
0      23.6  28.648960
1      32.4  36.495014
2      13.6  15.411193
3      22.8  25.403213
4      16.1  18.855280
..      ...        ...
147    17.1  17.403672
148    14.5  13.385941
149    50.0  39.983425
150    14.3  16.682863
151    12.6  18.285618

[152 rows x 2 columns]


In [9]:
from sklearn import metrics
# Regression metrics
explained_variance=metrics.explained_variance_score(y_LR_test, y_LR_pred) # The best possible score is 1.0, lower values are worse.
mean_absolute_error=metrics.mean_absolute_error(y_LR_test, y_LR_pred) #a risk metric corresponding to the expected 
# value of the absolute error loss
mse=metrics.mean_squared_error(y_LR_test, y_LR_pred) # a risk metric corresponding to the expected 
#value of the squared (quadratic) error or loss
median_absolute_error=metrics.median_absolute_error(y_LR_test, y_LR_pred)
r2=metrics.r2_score(y_LR_test, y_LR_pred)

In [10]:
print('explained_variance: ', round(explained_variance,4))    
print('r2: ', round(r2,4))
print('MAE: ', round(mean_absolute_error,4))
print('MSE: ', round(mse,4))
print('RMSE: ', round(np.sqrt(mse),4))

explained_variance:  0.7113
r2:  0.7112
MAE:  3.1627
MSE:  21.5174
RMSE:  4.6387


### Ridge Regression

In [11]:
#X,y
X_RR = data.data
y_RR = data.target

In [12]:
#train_test
X_RR_train, X_RR_test,y_RR_train,y_RR_test = train_test_split(X_RR, y_RR,test_size=0.3, random_state = 42)

In [13]:
#Initiate
ridge =Ridge()
param_grid={'alpha': [0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 ,
       0.65, 0.7 , 0.75]}
ridge_cv = GridSearchCV(ridge, param_grid, cv=4)

In [14]:
#fit 
ridge_cv.fit(X_RR_train,y_RR_train)

#predict
y_RR_pred = ridge_cv.predict(X_RR_test)

In [33]:
print(pd.DataFrame({'Actual': y_RR_test, 'Predicted': y_RR_pred}))

     Actual  Predicted
0      23.6  28.593002
1      32.4  36.452759
2      13.6  15.304163
3      22.8  25.371201
4      16.1  18.911681
..      ...        ...
147    17.1  17.482869
148    14.5  13.393860
149    50.0  39.935814
150    14.3  16.737009
151    12.6  18.376088

[152 rows x 2 columns]


In [15]:
print(ridge_cv.best_score_)
print(ridge_cv.best_estimator_.alpha)

0.7130103046471367
0.1


In [16]:
#metric
from sklearn import metrics
# Regression metrics
explained_variance=metrics.explained_variance_score(y_RR_test, y_RR_pred) # The best possible score is 1.0, lower values are worse.
mean_absolute_error=metrics.mean_absolute_error(y_RR_test, y_RR_pred) #a risk metric corresponding to the expected 
# value of the absolute error loss
mse=metrics.mean_squared_error(y_RR_test, y_RR_pred) # a risk metric corresponding to the expected 
#value of the squared (quadratic) error or loss
median_absolute_error=metrics.median_absolute_error(y_RR_test, y_RR_pred)
r2=metrics.r2_score(y_RR_test, y_RR_pred)

In [17]:
print('explained_variance: ', round(explained_variance,4))    
print('r2: ', round(r2,4))
print('MAE: ', round(mean_absolute_error,4))
print('MSE: ', round(mse,4))
print('RMSE: ', round(np.sqrt(mse),4))

explained_variance:  0.7104
r2:  0.7103
MAE:  3.1624
MSE:  21.5851
RMSE:  4.646


### LASSO

In [18]:
#X,y
X_LA = data.data
y_LA = data.target

#train_test
X_LA_train, X_LA_test,y_LA_train,y_LA_test = train_test_split(X_LA, y_LA,test_size=0.3, random_state = 42)

#Initiate
lasso = Lasso()
param_grid={'alpha': [0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 ,
       0.65, 0.7 , 0.75]}
lasso_cv = GridSearchCV(lasso, param_grid, cv=4)

#fit 
lasso_cv.fit(X_LA_train,y_LA_train)

#predict
y_LA_pred = lasso_cv.predict(X_LA_test)

In [19]:
print(lasso_cv.best_score_)
print(lasso_cv.best_estimator_.alpha)

0.7015959632760466
0.1


In [20]:
#metric
from sklearn import metrics
# Regression metrics
explained_variance=metrics.explained_variance_score(y_LA_test, y_LA_pred) # The best possible score is 1.0, lower values are worse.
mean_absolute_error=metrics.mean_absolute_error(y_LA_test, y_LA_pred) #a risk metric corresponding to the expected 
# value of the absolute error loss
mse=metrics.mean_squared_error(y_LA_test, y_LA_pred) # a risk metric corresponding to the expected 
#value of the squared (quadratic) error or loss
median_absolute_error=metrics.median_absolute_error(y_LA_test, y_LA_pred)
r2=metrics.r2_score(y_LA_test, y_LA_pred)

print('explained_variance: ', round(explained_variance,4))    
print('r2: ', round(r2,4))
print('MAE: ', round(mean_absolute_error,4))
print('MSE: ', round(mse,4))
print('RMSE: ', round(np.sqrt(mse),4))

explained_variance:  0.6919
r2:  0.6918
MAE:  3.2674
MSE:  22.9638
RMSE:  4.7921


### Elastic Net

In [21]:
#X,y
X_ER = data.data
y_ER = data.target

#train_test
X_ER_train, X_ER_test,y_ER_train,y_ER_test = train_test_split(X_ER, y_ER,test_size=0.3, random_state = 42)

#Initiate
elastic = ElasticNet()
param_grid={'alpha': [0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 , 0.55, 0.6 ,
       0.65, 0.7 , 0.75]}
elastic_cv = GridSearchCV(elastic, param_grid, cv=4)

#fit 
elastic_cv.fit(X_ER_train,y_ER_train)

#predict
y_ER_pred = elastic_cv.predict(X_ER_test)

In [22]:
print(elastic_cv.best_score_)
print(elastic_cv.best_estimator_.alpha)

0.7026117471583591
0.1


In [23]:
#metric
from sklearn import metrics
# Regression metrics
explained_variance=metrics.explained_variance_score(y_ER_test, y_ER_pred) # The best possible score is 1.0, lower values are worse.
mean_absolute_error=metrics.mean_absolute_error(y_ER_test, y_ER_pred) #a risk metric corresponding to the expected 
# value of the absolute error loss
mse=metrics.mean_squared_error(y_ER_test, y_ER_pred) # a risk metric corresponding to the expected 
#value of the squared (quadratic) error or loss
median_absolute_error=metrics.median_absolute_error(y_ER_test, y_ER_pred)
r2=metrics.r2_score(y_ER_test, y_ER_pred)

print('explained_variance: ', round(explained_variance,4))    
print('r2: ', round(r2,4))
print('MAE: ', round(mean_absolute_error,4))
print('MSE: ', round(mse,4))
print('RMSE: ', round(np.sqrt(mse),4))

explained_variance:  0.6924
r2:  0.6923
MAE:  3.2952
MSE:  22.9266
RMSE:  4.7882


In [26]:
print(reg.coef_)

[-1.33470103e-01  3.58089136e-02  4.95226452e-02  3.11983512e+00
 -1.54170609e+01  4.05719923e+00 -1.08208352e-02 -1.38599824e+00
  2.42727340e-01 -8.70223437e-03 -9.10685208e-01  1.17941159e-02
 -5.47113313e-01]


In [29]:
print(elastic_cv)

GridSearchCV(cv=4, error_score='raise-deprecating',
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=None, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45,
                                   0.5, 0.55, 0.6, 0.65, 0.7, 0.75]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)
