In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = sns.load_dataset('mpg')

In [3]:
df  #miles per gallon data, given mileage,  1 gallon= 3.7 km

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger


In [4]:
df.drop("name", axis=1, inplace=True)

In [5]:
df.sample(5)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
42,12.0,8,383.0,180.0,4955,11.5,71,usa
28,9.0,8,304.0,193.0,4732,18.5,70,usa
267,27.5,4,134.0,95.0,2560,14.2,78,japan
121,15.0,8,318.0,150.0,3399,11.0,73,usa
350,34.7,4,105.0,63.0,2215,14.9,81,usa


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    392 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 25.0+ KB


In [7]:
df.isna().sum()

mpg             0
cylinders       0
displacement    0
horsepower      6
weight          0
acceleration    0
model_year      0
origin          0
dtype: int64

In [8]:
#since we have not done outlier treatment, we will replace the value with median
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median())

In [9]:
df['horsepower']

0      130.0
1      165.0
2      150.0
3      150.0
4      140.0
       ...  
393     86.0
394     52.0
395     84.0
396     79.0
397     82.0
Name: horsepower, Length: 398, dtype: float64

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   horsepower    398 non-null    float64
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    object 
dtypes: float64(4), int64(3), object(1)
memory usage: 25.0+ KB


In [11]:
df.origin.value_counts()

origin
usa       249
japan      79
europe     70
Name: count, dtype: int64

In [12]:
#since categorical data, we will encode

In [13]:
df['origin'] = df['origin'].map({'usa':1,'japan':2,'europe':3})

In [14]:
df['origin']

0      1
1      1
2      1
3      1
4      1
      ..
393    1
394    3
395    1
396    1
397    1
Name: origin, Length: 398, dtype: int64

In [15]:
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower      float64
weight            int64
acceleration    float64
model_year        int64
origin            int64
dtype: object

In [16]:
X = df.drop('mpg', axis=1)
y = df['mpg']

In [17]:
X

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,8,307.0,130.0,3504,12.0,70,1
1,8,350.0,165.0,3693,11.5,70,1
2,8,318.0,150.0,3436,11.0,70,1
3,8,304.0,150.0,3433,12.0,70,1
4,8,302.0,140.0,3449,10.5,70,1
...,...,...,...,...,...,...,...
393,4,140.0,86.0,2790,15.6,82,1
394,4,97.0,52.0,2130,24.6,82,3
395,4,135.0,84.0,2295,11.6,82,1
396,4,120.0,79.0,2625,18.6,82,1


In [18]:
y

0      18.0
1      15.0
2      18.0
3      16.0
4      17.0
       ... 
393    27.0
394    44.0
395    32.0
396    28.0
397    31.0
Name: mpg, Length: 398, dtype: float64

In [19]:
#train test split
from sklearn.model_selection import train_test_split

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state = 1)

In [21]:
X_train

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
350,4,105.0,63.0,2215,14.9,81,1
59,4,97.0,54.0,2254,23.5,72,3
120,4,121.0,112.0,2868,15.5,73,3
12,8,400.0,150.0,3761,9.5,70,1
349,4,91.0,68.0,1985,16.0,81,2
...,...,...,...,...,...,...,...
393,4,140.0,86.0,2790,15.6,82,1
255,4,140.0,88.0,2720,15.4,78,1
72,8,304.0,150.0,3892,12.5,72,1
235,4,97.0,75.0,2265,18.2,77,2


In [22]:
X_test

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
174,6,171.0,97.0,2984,14.5,75,1
359,4,141.0,80.0,3230,20.4,81,3
250,8,318.0,140.0,3735,13.2,78,1
274,5,131.0,103.0,2830,15.9,78,3
283,6,232.0,90.0,3265,18.2,79,1
...,...,...,...,...,...,...,...
382,4,108.0,70.0,2245,16.9,82,2
39,8,400.0,175.0,4464,11.5,71,1
171,4,134.0,96.0,2702,13.5,75,2
271,4,156.0,105.0,2745,16.7,78,1


In [23]:
y_train

350    34.7
59     23.0
120    19.0
12     15.0
349    34.1
       ... 
393    27.0
255    25.1
72     15.0
235    26.0
37     18.0
Name: mpg, Length: 278, dtype: float64

In [24]:
y_test

174    18.0
359    28.1
250    19.4
274    20.3
283    20.2
       ... 
382    34.0
39     14.0
171    24.0
271    23.2
247    39.4
Name: mpg, Length: 120, dtype: float64

In [25]:
from sklearn.linear_model import LinearRegression
regression_model = LinearRegression()

In [26]:
regression_model

In [27]:
regression_model.fit(X_train, y_train)

In [28]:
regression_model.coef_

array([-0.31761423,  0.02623748, -0.01827076, -0.00748775,  0.05040673,
        0.84709514,  1.51909584])

In [29]:
regression_model.intercept_

-23.08538074231676

In [30]:
for i, col_name in enumerate(X_train.columns):  #enumerate clubs the value with the index
    print(f"The coefficient for {col_name} is {regression_model.coef_[i]}")

The coefficient for cylinders is -0.3176142302799276
The coefficient for displacement is 0.02623748259907893
The coefficient for horsepower is -0.01827076491312441
The coefficient for weight is -0.007487750398361907
The coefficient for acceleration is 0.050406734619714726
The coefficient for model_year is 0.8470951427061371
The coefficient for origin is 1.5190958387975058


In [31]:
#observation:
#coeff are relatively smaller, if one independent value changes
#there will be no much difference iin prediction
#this is sometimes called as smoother model
# these feature might not be contributing


In [32]:
from sklearn.metrics import r2_score
y_pred_linear = regression_model.predict(X_test)

In [33]:
y_pred_linear

array([21.16196121, 27.89684387, 20.45045592, 27.12361164, 24.36117063,
       15.87763934, 29.93157794, 34.02155729, 17.08992155, 10.56782304,
       30.53231377, 16.48854992, 22.4061424 , 27.76978226, 36.0209892 ,
       23.79725872, 10.82747269, 20.27707855,  8.86935273, 32.48801009,
       25.36507567, 32.75235387, 20.95486868, 24.54530695, 25.77582154,
       30.20140405, 32.01102103, 31.96692512, 15.25929349, 30.41225966,
       27.50427715, 10.93370544, 21.42816438, 28.08300976, 25.03368839,
       13.67199264, 26.67769394,  9.04050101, 32.03270673, 23.97429191,
       24.18855895, 24.60440771, 21.16368861, 34.53665774, 26.31981331,
       22.23170907, 21.0865992 , 11.65432984, 27.9398198 , 18.98058597,
       23.69821181, 26.86564242, 17.04794305, 12.03955477, 28.70876897,
       24.26227131, 10.20293895, 13.03594704, 29.96910853, 35.35029687,
       37.01162788, 35.38558158, 18.04991116, 27.90304164, 20.67174751,
       33.83899858, 27.02537633, 26.73184442, 29.93216787, 12.33

In [34]:
r2_linear = r2_score(y_test, y_pred_linear)

In [35]:
print(f"the r2 score is{r2_linear}")

the r2 score is0.8348001123742285


In [36]:
#regularise model
#ridge regression
from sklearn.linear_model import Ridge

In [37]:
ridge_reg_model = Ridge(alpha = 0.1)
ridge_reg_model

In [38]:
ridge_reg_model.fit(X_train, y_train)

In [39]:
for i, col_name in enumerate(X_train.columns):  #enumerate clubs the value with the index
    print(f"The coefficient for {col_name} is {ridge_reg_model.coef_[i]}")

The coefficient for cylinders is -0.317003210100651
The coefficient for displacement is 0.026213249757982896
The coefficient for horsepower is -0.01826325248144899
The coefficient for weight is -0.007487326050213115
The coefficient for acceleration is 0.05036896947442574
The coefficient for model_year is 0.8470062938903152
The coefficient for origin is 1.5174528285653759


In [40]:
y_pred_ridge = ridge_reg_model.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)

In [41]:
print(f"the r2 score is{r2_ridge}")

the r2 score is0.8348084889168358


In [42]:
#we didn't see much difference in ridge regression and linear regression

In [43]:
from sklearn.linear_model import Lasso

In [44]:
lasso_reg_model = Lasso(alpha = 0.5)

In [45]:
lasso_reg_model

In [46]:
lasso_reg_model.fit(X_train, y_train)

In [47]:
for i, col_name in enumerate(X_train.columns):  #enumerate clubs the value with the index
    print(f"The coefficient for {col_name} is {lasso_reg_model.coef_[i]}")

The coefficient for cylinders is -0.0
The coefficient for displacement is 0.006208198888300375
The coefficient for horsepower is -0.011058382987169579
The coefficient for weight is -0.00698267316802309
The coefficient for acceleration is 0.0
The coefficient for model_year is 0.7446549520038191
The coefficient for origin is 0.0


In [48]:
#three freatures' coefficient are 0, Lasso helps in feature selection

In [49]:
y_pred_lasso = lasso_reg_model.predict(X_test)

In [50]:
r2_lasso = r2_score(y_test, y_pred_lasso)
print(f"the r2 score is{r2_lasso}")

the r2 score is0.8277934716635554


In [51]:
#Elastic net
from sklearn.linear_model import ElasticNet

In [52]:
elastic_net_model =ElasticNet(alpha = 1, l1_ratio=0.5)
elastic_net_model

In [53]:
elastic_net_model.fit(X_train, y_train)

In [54]:
for i, col_name in enumerate(X_train.columns):  #enumerate clubs the value with the index
    print(f"The coefficient for {col_name} is {elastic_net_model.coef_[i]}")

The coefficient for cylinders is -0.0
The coefficient for displacement is 0.0058888699536675535
The coefficient for horsepower is -0.012403874933570123
The coefficient for weight is -0.00693455051625763
The coefficient for acceleration is 0.0
The coefficient for model_year is 0.7133150744603872
The coefficient for origin is 0.0


In [55]:
y_pred_elastic = elastic_net_model.predict(X_test)
y_pred_elastic

array([22.73514986, 25.34334066, 19.99954491, 25.63303782, 24.08584966,
       15.20819736, 29.04324503, 33.44315878, 17.16440511, 10.82726179,
       30.86056513, 17.06306271, 22.14801913, 26.36895354, 35.47957798,
       22.69874996, 10.02492652, 21.38736507,  7.92786138, 32.16379804,
       25.4731963 , 31.201348  , 22.24121477, 25.47985421, 26.1390185 ,
       28.67566619, 31.31343178, 31.81322052, 16.18208384, 30.46892737,
       27.89644118,  9.80911386, 20.45358378, 27.62226407, 25.84766781,
       13.65616682, 28.29713451,  7.75300193, 32.45551175, 25.1772364 ,
       26.32308292, 25.66646811, 20.8244262 , 32.41769713, 27.3463305 ,
       22.38311428, 20.86300504, 11.8602787 , 28.21004831, 19.1686974 ,
       24.76653913, 27.39011848, 16.48028306, 12.04913477, 30.20905317,
       25.0478948 ,  9.30262917, 13.0205239 , 30.16986996, 34.69566901,
       34.58726781, 34.69566901, 18.14214533, 28.66161626, 19.41457092,
       31.95734154, 28.0744583 , 26.77042657, 30.38995932, 12.82

In [56]:
r2_elastic = r2_score(y_test, y_pred_elastic)
print(f"the r2 score is{r2_elastic}")

the r2 score is0.8284840073256805


In [57]:
#regularisation with cross validation
from sklearn.linear_model import LassoCV
lasso_cv = LassoCV(cv=5, verbose=2)  
lasso_cv

In [58]:
lasso_cv.fit(X_train, y_train)

Path: 000 out of 100
Path: 001 out of 100
Path: 002 out of 100
Path: 003 out of 100
Path: 004 out of 100
Path: 005 out of 100
Path: 006 out of 100
Path: 007 out of 100
Path: 008 out of 100
Path: 009 out of 100
Path: 010 out of 100
Path: 011 out of 100
Path: 012 out of 100
Path: 013 out of 100
Path: 014 out of 100
Path: 015 out of 100
Path: 016 out of 100
Path: 017 out of 100
Path: 018 out of 100
Path: 019 out of 100
Path: 020 out of 100
Path: 021 out of 100
Path: 022 out of 100
Path: 023 out of 100
Path: 024 out of 100
Path: 025 out of 100
Path: 026 out of 100
Path: 027 out of 100
Path: 028 out of 100
Path: 029 out of 100
Path: 030 out of 100
Path: 031 out of 100
Path: 032 out of 100
Path: 033 out of 100
Path: 034 out of 100
Path: 035 out of 100
Path: 036 out of 100
Path: 037 out of 100
Path: 038 out of 100
Path: 039 out of 100
Path: 040 out of 100
Path: 041 out of 100
Path: 042 out of 100
Path: 043 out of 100
Path: 044 out of 100
Path: 045 out of 100
Path: 046 out of 100
Path: 047 out

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s finished


In [59]:
y_pred_lassocv = lasso_cv.predict(X_test)

In [60]:
r2_lassocv = r2_score(y_test, y_pred_lassocv)
print(f"the r2 score is{r2_lassocv}")

the r2 score is0.808280598384475


In [61]:
from sklearn.linear_model import RidgeCV
ridge_cv = RidgeCV(cv=5)  

In [62]:
ridge_cv.fit(X_train, y_train)

In [63]:
y_pred_ridgecv = ridge_cv.predict(X_test)

In [64]:
r2_ridgecv = r2_score(y_test, y_pred_ridgecv)
print(f"the r2 score is{r2_ridgecv}")

the r2 score is0.8354145247502054


In [65]:
ridge_cv.get_params()

{'alpha_per_target': False,
 'alphas': (0.1, 1.0, 10.0),
 'cv': 5,
 'fit_intercept': True,
 'gcv_mode': None,
 'scoring': None,
 'store_cv_values': False}

In [66]:
from sklearn.linear_model import ElasticNetCV
elastic_cv = ElasticNetCV(cv=5)
elastic_cv

In [67]:
elastic_cv.fit(X_train, y_train)

In [68]:
y_pred_elasticcv = elastic_cv.predict(X_test)

In [69]:
r2_elasticcv = r2_score(y_test, y_pred_elasticcv)
print(f"the r2 score is{r2_elasticcv}")

the r2 score is0.792863401804916


# CV with Hyperparameter Tuning
## Grid Search CV
## Random Search CV

In [71]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [72]:
#define model 
lasso =Lasso()

In [73]:
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}   #for hit and trial method
param_grid

{'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

In [74]:
grid_search = GridSearchCV(estimator=lasso, param_grid=param_grid, cv=5, scoring='r2',verbose=2)
grid_search

In [75]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ........................................

In [76]:
grid_search.best_params_

{'alpha': 0.1}

In [77]:
grid_search.best_params_['alpha']

0.1

In [78]:
grid_search.best_score_

0.7964209726696481

In [79]:
grid_search.best_estimator_

In [80]:
y_pred_grid = grid_search.best_estimator_.predict(X_test)

In [81]:

y_pred_grid

array([21.77593691, 27.18472492, 20.54663855, 26.94962916, 24.2913322 ,
       15.77599569, 29.80110905, 33.97933837, 17.3866017 , 10.63921645,
       30.52949993, 16.9674299 , 22.25675614, 27.45218408, 36.02384988,
       23.53768364, 10.33981014, 20.63068389,  8.38249708, 32.43857659,
       25.27144642, 32.47152734, 21.30251176, 24.74699463, 25.60932885,
       29.78589706, 31.87433617, 31.99672602, 15.70956036, 30.29392131,
       27.78858215, 10.25294808, 21.06303658, 27.91321663, 25.04077924,
       13.64832586, 27.00042689,  8.32443756, 32.15601595, 24.07278681,
       24.60098688, 24.71467915, 21.3759044 , 34.16928519, 26.81842861,
       22.29346332, 20.91943465, 11.73669659, 28.01933257, 18.95634076,
       23.92117407, 26.82374406, 17.02169335, 12.07817727, 29.04031487,
       24.37239653,  9.68213859, 13.05374136, 30.05855282, 35.33769464,
       36.537393  , 35.34551995, 18.32925695, 28.38111261, 20.42916726,
       33.5213158 , 27.56553734, 26.89479085, 29.95532405, 12.58

In [82]:


r2_grid = r2_score(y_test, y_pred_grid)
print(f"the r2 score is{r2_grid}")

the r2 score is0.8345318641232303


In [83]:
#RandomizedSearch CV

In [84]:
randomized_search = RandomizedSearchCV(estimator=lasso, param_distributions=param_grid, cv=5, scoring='r2',verbose=2, n_iter=3)
randomized_search

In [85]:
randomized_search.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ............................................alpha=1; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................

In [86]:
randomized_search.best_params_

{'alpha': 0.001}

In [87]:
randomized_search.best_estimator_

In [88]:
y_pred_rand = randomized_search.best_estimator_.predict(X_test)


In [89]:
r2_rand = r2_score(y_test, y_pred_rand)
print(f"the r2 score is{r2_rand}")

the r2 score is0.8348101865598603


In [90]:
#checking for Ridge in randomizedCV

In [91]:
ridge = Ridge()

In [92]:
ridge

In [93]:
randomized_search_r = RandomizedSearchCV(estimator=ridge, param_distributions=param_grid, cv=5, scoring='r2',verbose=2, n_iter=2)
randomized_search_r

In [94]:
randomized_search_r.fit(X_train, y_train)

Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END ..........................................alpha=100; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s


In [95]:
randomized_search_r.best_params_

{'alpha': 0.01}

In [96]:
randomized_search_r.best_estimator_

In [97]:
y_pred_rand_r = randomized_search_r.best_estimator_.predict(X_test)

In [98]:
r2_rand_r = r2_score(y_test, y_pred_rand_r)
print(f"the r2 score is{r2_rand_r}")

the r2 score is0.834800952459415


In [99]:
#now we can check grid search for ridge as well

In [100]:
grid_search_r = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=5, scoring='r2',verbose=2)
grid_search_r

In [101]:
grid_search_r.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ..........................................alpha=0.1; total time=   0.0s
[CV] END ........................................

In [102]:
grid_search_r.best_params_

{'alpha': 10}

In [103]:
grid_search_r.best_estimator_

In [104]:
grid_search_r.best_score_

0.7943589846334791

In [105]:
y_pred_grid_r = grid_search_r.best_estimator_.predict(X_test)

In [106]:
r2_grid_r = r2_score(y_test, y_pred_grid_r)
print(f"the r2 score is{r2_grid_r}")

the r2 score is0.8354145247502054


In [213]:
#Now checking for elastic net

In [215]:
model = ElasticNet()
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100], 'l1_ratio': [0.1,0.4,0.9]}
grid_search_ec = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='r2',verbose=2)


In [217]:
grid_search_ec.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.1; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.4; total time=   0.0s
[CV] END ..........................alpha=0.001, l1_ratio=0.9; total time=   0.0s
[CV] END ..........................alpha=0.001, 

In [219]:
grid_search_ec.best_estimator_

In [221]:
grid_search_ec.best_params_

{'alpha': 0.1, 'l1_ratio': 0.9}

In [225]:
y_pred_grid_ec = grid_search_ec.best_estimator_.predict(X_test)
r2_grid_ec = r2_score(y_test, y_pred_grid_ec)
print(f"the r2 score is{r2_grid_ec}")

the r2 score is0.8345726087080976


In [231]:
model = ElasticNet()
param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100], 'l1_ratio': [0.1,0.4,0.9]}
randomized_search_ec = RandomizedSearchCV(estimator=model, param_distributions=param_grid, cv=5, scoring='r2',n_iter = 2, verbose=2)


In [233]:
randomized_search_ec.fit(X_train, y_train)

Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] END ..............................alpha=1, l1_ratio=0.1; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.1; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.1; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.1; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.1; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.9; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.9; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.9; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.9; total time=   0.0s
[CV] END ..............................alpha=1, l1_ratio=0.9; total time=   0.0s


In [235]:
randomized_search_ec.best_estimator_

In [237]:
randomized_search_ec.best_params_

{'l1_ratio': 0.1, 'alpha': 1}

In [239]:
y_pred_rand_ec = randomized_search_ec.best_estimator_.predict(X_test)
r2_rand_ec = r2_score(y_test, y_pred_rand_ec)
print(f"the r2 score is{r2_rand_ec}")

the r2 score is0.8320839106026104
