In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split,KFold,GridSearchCV
from sklearn.datasets import load_boston
from sklearn.linear_model import Lasso

In [2]:
df = load_boston()
X = df.data
y = df.target
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=42)

In [3]:
llr = Lasso()
'''
# Params

# alpha(default --> 1.0) ---> Regularization strength; must be a positive float
# precompute ---> default -- 'auto'  Whether to use a precomputed Gram matrix to speed up calculations.
# fit_intercept --> defalut  is true,if false..Line will pass thru Origin
# normalize -->Input data will be normalized before fitting...Usually we use standard orMinMax scalreto do our work
# copy_X (default --> True) --> Make acopy of X for fitting.Could impact if normalize is True
# max_iter ---> Maximum number of iterations for conjugate gradient solve
# tol --> Precesion of solution defaultis 0.001
# positive --> When set to True, forces the coefficients to be positive.
# selection{‘cyclic’, ‘random’}, default=’cyclic’  If set to ‘random’, a random coefficient is updated every iteration rather than
                               looping over features sequentially by default. 
                               This (setting to ‘random’) often leads to significantly faster convergence especially 
                               when tol is higher than 1e-4.
'''

In [4]:
hyper_params = {'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 
 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 3.0, 
 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 20, 50, 100, 500, 1000 ],
         'selection':['cyclic','random'],
               'tol':[0.0001, 0.001]}
folds = KFold(n_splits=5,shuffle=True,random_state=42)
model_cv = GridSearchCV(estimator=llr,
                       param_grid = hyper_params,
                       scoring ='neg_mean_absolute_error',
                       cv = folds,
                       verbose=1,
                       return_train_score=True,
                       n_jobs=30)
model_cv.fit(X_train,y_train)
'''
By default joblib.Parallel uses the 'loky' backend module to start separate Python worker processes to execute tasks 
concurrently on separate CPUs. This is a reasonable default for generic Python programs but can induce a significant 
overhead as the input and output data need to be serialized in a queue for communication with the worker processes 
'''

Fitting 5 folds for each of 112 candidates, totalling 560 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done 140 tasks      | elapsed:   28.5s
[Parallel(n_jobs=30)]: Done 390 tasks      | elapsed:   30.3s
[Parallel(n_jobs=30)]: Done 560 out of 560 | elapsed:   35.8s finished


"\nBy default joblib.Parallel uses the 'loky' backend module to start separate Python worker processes to execute tasks \nconcurrently on separate CPUs. This is a reasonable default for generic Python programs but can induce a significant \noverhead as the input and output data need to be serialized in a queue for communication with the worker processes \n"

In [5]:
print(model_cv.best_score_)
print(model_cv.best_params_)
print(model_cv.best_estimator_)

-3.534631283402708
{'alpha': 0.05, 'selection': 'cyclic', 'tol': 0.001}
Lasso(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.001, warm_start=False)


In [6]:
llr = Lasso(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.001, warm_start=False)
llr.fit(X_train,y_train)

Lasso(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.001, warm_start=False)

In [8]:
# Attributes
print(llr.coef_)
print(llr.intercept_)
print(llr.n_iter_) # Actual number of iterations for each target. Available only for sag and lsqr solvers. Other solvers will return None
print(llr.sparse_coef_) #sparse representation of the fitted coef_

[-0.12251843  0.03812112 -0.00804914  2.24158638 -0.          4.02034063
 -0.02217066 -1.12842383  0.20942077 -0.01023412 -0.73940918  0.01227677
 -0.58787165]
21.864854256801557
41
  (0, 0)	-0.12251842617663834
  (0, 1)	0.03812112072647257
  (0, 2)	-0.008049136988335745
  (0, 3)	2.2415863845707573
  (0, 5)	4.020340628368363
  (0, 6)	-0.02217066013733395
  (0, 7)	-1.1284238272128182
  (0, 8)	0.20942077417860552
  (0, 9)	-0.0102341204165126
  (0, 10)	-0.7394091787261229
  (0, 11)	0.012276773355356253
  (0, 12)	-0.5878716499071909


In [11]:
print(llr.score(X_test,y_test))
print(llr.get_params())
#print(llr.path(X_test,y_test)) #Compute elastic net path with coordinate descent.
print(llr.predict(X_test))

0.6916523642145405
{'alpha': 0.05, 'copy_X': True, 'fit_intercept': True, 'max_iter': 1000, 'normalize': False, 'positive': False, 'precompute': False, 'random_state': None, 'selection': 'cyclic', 'tol': 0.001, 'warm_start': False}
(array([16065.79342105, 14983.01271481, 13973.20780422, 13031.46036491,
       12153.18355109, 11334.09965504, 10570.2192722 ,  9857.82187054,
        9193.43766944,  8573.83073989,  7995.98324363,  7457.08073463,
        6954.49845109,  6485.78853138,  6048.66809155,  5641.00810636,
        5260.82303979,  4906.26117427,  4575.5955918 ,  4267.2157629 ,
        3979.61970235,  3711.40665374,  3461.27026692,  3227.9922354 ,
        3010.43636244,  2807.54302719,  2618.32402368,  2441.85774771,
        2277.28470813,  2123.8033406 ,  1980.66610357,  1847.17583725,
        1722.6823681 ,  1606.57934211,  1498.30127148,  1397.32078042,
        1303.14603649,  1215.31835511,  1133.4099655 ,  1057.02192722,
         985.78218705,   919.34376694,   857.38307399,   