##### IMPORT REQUIRED LIBRARY

In [4]:
# importing required libraries
import numpy as np
import pandas as pd
from pandas import read_csv
from pandas import Series, DataFrame
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso, Ridge

##### LOAD DATASET   |   IMPORT DATA   |   EXPLORE THE DATA

In [6]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/housing.csv'

# Import Data
dataframe = read_csv(url, header = None)

print('\n\n---------DATA---------------\n\n')
print(dataframe.shape)
print(dataframe.head())



---------DATA---------------


(506, 14)
        0     1     2   3      4      5     6       7   8      9     10  \
0  0.00632  18.0  2.31   0  0.538  6.575  65.2  4.0900   1  296.0  15.3   
1  0.02731   0.0  7.07   0  0.469  6.421  78.9  4.9671   2  242.0  17.8   
2  0.02729   0.0  7.07   0  0.469  7.185  61.1  4.9671   2  242.0  17.8   
3  0.03237   0.0  2.18   0  0.458  6.998  45.8  6.0622   3  222.0  18.7   
4  0.06905   0.0  2.18   0  0.458  7.147  54.2  6.0622   3  222.0  18.7   

       11    12    13  
0  396.90  4.98  24.0  
1  396.90  9.14  21.6  
2  392.83  4.03  34.7  
3  394.63  2.94  33.4  
4  396.90  5.33  36.2  


##### DEFINE X & y VARIABLE

In [15]:
X = dataframe.iloc[:, :-1]
y = dataframe.iloc[:, -1]

##### SPLIT DATA INTO TRAINING AND TEST SET

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 2)

##### CREATE LASSO INSTANCE   |   FIT IT ON TRAINING SET 

In [24]:
lasso = Lasso()
lasso.fit(X_train, y_train)

Lasso()

##### CREATE PREDICTION FROM TEST SET   |   FIND MEAN SQUARED ERROR

In [25]:
pred2 = lasso.predict(X_test)
mse_2 = np.mean((pred2 - y_test) ** 2)
print('\n\nMean Squared Error = ',mse_2)



Mean Squared Error =  26.16001377149239


##### CALCUATE COEFFICIENTS

In [21]:
# calculating coefficients
coeff = DataFrame(X_train.columns)
coeff['Coefficient Estimate'] = Series(lasso.coef_)
print(coeff)

print('\n\nModel performance on Test data = ')
print(lasso.score(X_test, y_test))

     0  Coefficient Estimate
0    0             -0.053621
1    1              0.037468
2    2             -0.000000
3    3              0.000000
4    4             -0.000000
5    5              1.005373
6    6              0.023177
7    7             -0.660082
8    8              0.269621
9    9             -0.015069
10  10             -0.780676
11  11              0.010144
12  12             -0.783601


Model performance on Test data = 
0.705243994830103


##### CREATE LASSO INSTANCE WITH ALPHA = 0.1   |   FIT IT ON TRAINING SET

In [27]:
modelLasso = Lasso(alpha = 0.1)
modelLasso.fit(X_train, y_train)

Lasso(alpha=0.1)

##### CREATE PREDICTION ALPHA = 0.1 FROM TEST SET | FIND MEAN SQUARED ERROR

In [28]:
y_pred = modelLasso.predict(X_test)
mse_2 = np.mean((y_pred - y_test) ** 2)
print('\n\nMean Squared Error = ',mse_2)



Mean Squared Error =  23.217526268488566


##### CALCUATE COEFFICIENTS - ALPHA = 0.1

In [29]:
# calculating coefficients
coeff = DataFrame(X_train.columns)
coeff['Coefficient Estimate'] = Series(modelLasso.coef_)
print(coeff)

print('\n\nModel performance on Test data = ')
print(modelLasso.score(X_test, y_test))

     0  Coefficient Estimate
0    0             -0.091559
1    1              0.038665
2    2             -0.064131
3    3              1.691652
4    4             -0.000000
5    5              3.478853
6    6             -0.004477
7    7             -1.120858
8    8              0.288189
9    9             -0.014272
10  10             -0.853747
11  11              0.012329
12  12             -0.597909


Model performance on Test data = 
0.7383982534334721


##### APPLY GRID_SEARCH_CV

In [31]:
from numpy import arange
from pandas import read_csv
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Lasso

In [32]:
# Define model
gridcv_model = Lasso()

# Define model evaluation method
cv = RepeatedKFold(n_splits = 10, n_repeats = 6, random_state = 1)

# Define grid
grid = dict()
grid['alpha'] = arange(0, 1, 0.01)

# Define search
search = GridSearchCV(gridcv_model, grid, scoring = 'neg_mean_absolute_error', cv = cv, n_jobs = -1)

# Perform the search
results = search.fit(X_train, y_train)

# Summarize
print('MAE: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)

MAE: -3.512
Config: {'alpha': 0.02}


##### USE AUTOMATICALLY CONFIGURED LASOO REGRESSION ALGORITHM

In [33]:
from numpy import arange
from pandas import read_csv
from sklearn.linear_model import LassoCV
from sklearn.model_selection import RepeatedKFold

In [36]:
# Define model Evaluation Method
cvLasso = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 1)

# Define model
lassoCVmodel = LassoCV(alphas = arange(0, 1, 0.01), cv = cvLasso, n_jobs = -1)

# Fit model
lassoCVmodel.fit(X_train, y_train)

# Summarize chosen configuration
print('alpha: %f' % lassoCVmodel.alpha_)

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


alpha: 0.000000


  model.fit(X, y)
  positive)
  positive)


In [44]:
# CREATE LASSO INSTANCE WITH ALPHA = 0.1 | FIT IT ON TRAINING SET
modelLasso_0point02 = Lasso(alpha = 0.02)
modelLasso_0point02.fit(X_train, y_train)

# CREATE PREDICTION ALPHA = 0.1 FROM TEST SET | FIND MEAN SQUARED ERROR
y_pred0point02 = modelLasso_0point02.predict(X_test)
mse_2 = np.mean((y_pred0point02 - y_test) ** 2)
print('\n\nMean Squared Error = ', mse_2)

# Mean Squared Error =  23.217526268488566



Mean Squared Error =  22.44802447695047
