In [1]:
import numpy as np
import pandas as pd
import os

from sklearn.linear_model import Lasso,LassoCV,LassoLarsCV

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

os.getcwd()
data = pd.read_csv('ox.csv')

X = data.iloc[:,2:]
Y = data.iloc[:,1]

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,random_state=0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

linlasso = Lasso(alpha=2.0).fit(X_train_scaled, y_train)

print('lasso regression linear model intercept: {}'.format(linlasso.intercept_))

print('lasso regression linear model coeff:\n{}'.format(linlasso.coef_))

print('Non-zero features: {}'.format(np.sum(linlasso.coef_ != 0)))

print('R-squared score (training): {:.3f}'.format(linlasso.score(X_train, y_train)))

print('R_squared score (test): {:.3f}\n'.format(linlasso.score(X_test, y_test)))

print('Features with non-zero weight (sorted by absolute magnitude):')

for e in sorted (list(zip(list(X),linlasso.coef_)), key = lambda e: -abs(e[1])):
    if e[1] != 0:
        print('\t{}, {:.3f}'.format(e[0], e[1]))

lasso regression linear model intercept: -0.3355575209090909
lasso regression linear model coeff:
[ 0. -0.  0. -0. -0.  0.  0. -0. -0.  0.  0.  0. -0.  0. -0.  0.  0. -0.
 -0.  0.  0. -0. -0.  0.  0. -0. -0. -0. -0.  0. -0. -0. -0.  0. -0. -0.
 -0. -0.  0. -0. -0.  0.  0. -0.  0.  0. -0. -0. -0.  0.]
Non-zero features: 0
R-squared score (training): 0.000
R_squared score (test): -0.020

Features with non-zero weight (sorted by absolute magnitude):


  return self.partial_fit(X, y)
  return self.partial_fit(X, y)


In [2]:
print('Lasso regression: effect of alpha regularization\n\
      parameter on number of features kept in final model\n')

for alpha in [0.0001, 0.0005, 0.001, 0.01, 0.1, 1, 5, 10]:
    linlasso = Lasso(alpha).fit(X_train_scaled, y_train)
    r2_train = linlasso.score(X_train_scaled, y_train)
    r2_test = linlasso.score(X_test_scaled, y_test)
    
    print('Alpha = {:.4f}\nFeatures kept: {}, r-squared training: {:.2f}, \
          r_squared test: {:.2f}'
          .format(alpha, np.sum(linlasso.coef_ != 0), r2_train, r2_test))

Lasso regression: effect of alpha regularization
      parameter on number of features kept in final model

Alpha = 0.0001
Features kept: 49, r-squared training: 0.71,           r_squared test: 0.66
Alpha = 0.0005
Features kept: 47, r-squared training: 0.71,           r_squared test: 0.65
Alpha = 0.0010
Features kept: 43, r-squared training: 0.71,           r_squared test: 0.66
Alpha = 0.0100
Features kept: 18, r-squared training: 0.61,           r_squared test: 0.69
Alpha = 0.1000
Features kept: 2, r-squared training: 0.16,           r_squared test: 0.20
Alpha = 1.0000
Features kept: 0, r-squared training: 0.00,           r_squared test: -0.02
Alpha = 5.0000
Features kept: 0, r-squared training: 0.00,           r_squared test: -0.02
Alpha = 10.0000
Features kept: 0, r-squared training: 0.00,           r_squared test: -0.02


In [3]:
linlasso = Lasso(0.001).fit(X_train_scaled, y_train)
r2_train = linlasso.score(X_train_scaled, y_train)
r2_test = linlasso.score(X_test_scaled, y_test)
print('Alpha = {:.4f}\nFeatures kept: {}, r-squared training: {:.2f}, \
     r_squared test: {:.2f}'
     .format(alpha, np.sum(linlasso.coef_ != 0), r2_train, r2_test))
print('Features with non-zero weight (sorted by absolute magnitude):')
for e in sorted (list(zip(list(X),linlasso.coef_)), key = lambda e: -abs(e[1])):
    if e[1] != 0:
        print('\t{}, {:.4f}'.format(e[0], e[1]))

Alpha = 10.0000
Features kept: 43, r-squared training: 0.71,      r_squared test: 0.66
Features with non-zero weight (sorted by absolute magnitude):
	XY * X2, -1.3200
	y, -1.2179
	XY * Y2, 1.1202
	xy * x2, -1.1078
	x * x2, 1.0202
	XY * x2, -0.8803
	xy, 0.8333
	X2, -0.8196
	X * XY, 0.7324
	X * y, -0.7219
	X * Y2, 0.6525
	Y * Y2, -0.6508
	Y * x2, 0.5947
	x2 * y2, 0.5812
	XY * x, -0.5754
	X2 * y, 0.5682
	XY, -0.5452
	X * xy, 0.5362
	x, 0.4404
	X2 * y2, -0.4117
	x * xy, 0.3215
	Y2, -0.3091
	Y2 * x, 0.2661
	X2 * x, -0.2473
	Y * y2, 0.2412
	Y * xy, -0.2341
	x * y2, -0.2282
	X * x, -0.2198
	Y2 * x2, 0.2122
	xy * y2, -0.2112
	X * y2, -0.2082
	y2, 0.2005
	y * y2, -0.1303
	x2, -0.1291
	XY * y2, -0.1278
	X * x2, 0.1266
	XY * y, -0.1256
	Y2 * y2, -0.1218
	Y * x, 0.0506
	X2 * Y2, 0.0506
	Y, 0.0475
	XY * xy, 0.0170
	X, -0.0049
