In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split,KFold,GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.datasets import load_boston

In [2]:
df = load_boston()
X = df.data
y = df.target
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=42)

In [3]:
rlr = Ridge()
'''
# Params

# alpha(default --> 1.0) ---> Regularization strength; must be a positive float
# fit_intercept --> defalut  is true,if false..Line will pass thru Origin
# normalize -->Input data will be normalized before fitting...Usually we use standard orMinMax scalreto do our work
# copy_X (default --> True) --> Make acopy of X for fitting.Could impact if normalize is True
# max_iter ---> Maximum number of iterations for conjugate gradient solve
# tol --> Precesion of solution defaultis 0.001
# solver ---> ‘auto’, ‘svd’, ‘cholesky’, ‘lsqr’, ‘sparse_cg’, ‘sag’, ‘saga’ ... default is auto

‘auto’ chooses the solver automatically based on the type of data.

‘svd’ uses a Singular Value Decomposition of X to compute the Ridge coefficients. More stable for singular matrices than ‘cholesky’.

‘cholesky’ uses the standard scipy.linalg.solve function to obtain a closed-form solution.

‘sparse_cg’ uses the conjugate gradient solver as found in scipy.sparse.linalg.cg. As an iterative algorithm, this solver is more appropriate than ‘cholesky’ for large-scale data (possibility to set tol and max_iter).

‘lsqr’ uses the dedicated regularized least-squares routine scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative procedure.

‘sag’ uses a Stochastic Average Gradient descent, and ‘saga’ uses its improved, unbiased version named SAGA. Both methods also use an iterative procedure, and are often faster than other solvers when both n_samples and n_features are large. Note that ‘sag’ and ‘saga’ fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing.

'''

In [5]:
hyper_params = {'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 
 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0, 3.0, 
 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 20, 50, 100, 500, 1000 ],
         'solver':['auto','svd','cholesky','lsqr','sparse_cg','sag','saga']}
folds = KFold(n_splits=5,shuffle=True,random_state=42)
model_cv = GridSearchCV(estimator=rlr,
                       param_grid = hyper_params,
                       scoring ='neg_mean_absolute_error',
                       cv = folds,
                       verbose=1,
                       return_train_score=True,
                       n_jobs=30)
model_cv.fit(X_train,y_train)
'''
By default joblib.Parallel uses the 'loky' backend module to start separate Python worker processes to execute tasks 
concurrently on separate CPUs. This is a reasonable default for generic Python programs but can induce a significant 
overhead as the input and output data need to be serialized in a queue for communication with the worker processes 
'''

Fitting 5 folds for each of 196 candidates, totalling 980 fits


[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.
[Parallel(n_jobs=30)]: Done 140 tasks      | elapsed:  1.1min
[Parallel(n_jobs=30)]: Done 390 tasks      | elapsed:  1.1min
[Parallel(n_jobs=30)]: Done 740 tasks      | elapsed:  1.2min
[Parallel(n_jobs=30)]: Done 980 out of 980 | elapsed:  2.2min finished


"\nBy default joblib.Parallel uses the 'loky' backend module to start separate Python worker processes to execute tasks \nconcurrently on separate CPUs. This is a reasonable default for generic Python programs but can induce a significant \noverhead as the input and output data need to be serialized in a queue for communication with the worker processes \n"

In [6]:
print(model_cv.best_score_)
print(model_cv.best_params_)
print(model_cv.best_estimator_)

-3.526471400593312
{'alpha': 1.0, 'solver': 'auto'}
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [7]:
rlr = Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)
rlr.fit(X_train,y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [8]:
# attributes
print(rlr.coef_)
print(rlr.intercept_)
print(rlr.n_iter_) # Actual number of iterations for each target. Available only for sag and lsqr solvers. Other solvers will return None

[-0.1284272   0.03695233  0.01791436  2.93269454 -7.84806046  4.06357438
 -0.01724174 -1.27176091  0.22549398 -0.00938149 -0.82710453  0.01198771
 -0.56347377]
26.764544089344003
None


In [9]:
# Methods
print(rlr.score(X_test,y_test))
print(rlr.get_params())
print(rlr.predict(X_test))

0.7041586727559436
{'alpha': 1.0, 'copy_X': True, 'fit_intercept': True, 'max_iter': None, 'normalize': False, 'random_state': None, 'solver': 'auto', 'tol': 0.001}
[28.34715071 36.21437831 14.80303493 25.22804634 19.17458952 22.29031648
 17.33028278 13.95318273 22.14506024 20.74582213 24.14011309 18.40447565
 -7.01750315 21.1846762  19.19820978 26.02798498 19.86973569  5.51208138
 40.33260217 17.84866562 27.65388459 30.02887972 10.98804148 24.46625789
 18.37620309 15.25046005 22.61319214 14.80791284 21.62917267 19.57814764
 21.73254632 25.36726035 25.30044412 19.30763226 15.88564352 18.87116841
 30.92984029 20.76386724 23.49286646 24.67082407 14.0468997  31.67610639
 42.34382119 17.35629744 26.91818696 17.33727443 13.86600737 25.91730843
 19.76761959 30.38603367 21.22823089 33.9789751  15.71192385 26.17445031
 39.58477186 22.87290593 19.40274082 33.07593414 24.76614616 12.67254167
 23.02338367 31.20498451 31.79965924 16.82657749 21.53203533 15.72325795
 20.43639171 26.13467468 31.2656