In [8]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split,KFold,cross_val_score,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
import sklearn.metrics as mx

In [4]:
df = load_iris()
X = df.data
y = df.target
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=42)

In [5]:
lr = LogisticRegression()
'''
# Penalty is for regularization..there are different solvers for LogisticRegression
   a) Newton-cg,sag,lbfgs ---> support l2 regularization, none as well
   b) saga ---> support 'elasticnet',l2 and in latest version it supports the L1 regularization as well none as well
   c) liblinear ---> Doesn't support none
# dual --> 'False'. dual or primal formulation. If # rows> #cols keepdualas false
# tol -->Tolerance for stopping criteria default 10e-4
# C ----> Inverse of regularization strength; must be a positive float. Like in support vector machines,
          smaller values specify stronger regularization.
# fit_intercept: Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function
# solver{‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’}, default=’lbfgs’
# max_iterint, default=100 Maximum number of iterations taken for the solvers to converge.
# n_jobsint, default=None
'''

"\n# Penalty is for regularization..there are different solvers for LogisticRegression\n   a) Newton-cg,sag,lbfgs ---> support l2 regularization, none as well\n   b) saga ---> support 'elasticnet',l2 and in latest version it supports the L1 regularization as well none as well\n   c) liblinear ---> Doesn't support none\n# dual --> 'False'. dual or primal formulation. If # rows> #cols keepdualas false\n# tol -->Tolerance for stopping criteria default 10e-4\n# fit_intercept: Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function\n# solver{‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’}, default=’lbfgs’\n# max_iterint, default=100 Maximum number of iterations taken for the solvers to converge.\n# n_jobsint, default=None\n"

In [10]:
hyper_params = [{
    'solver': ['newton-cg','lbfgs', 'liblinear', 'sag', 'saga'],
    'C':[1,10,100,1000]
}]
folds = KFold(n_splits=5,shuffle=True,random_state=42)
model_cv = GridSearchCV(estimator=lr,
                       param_grid = hyper_params,
                       scoring ='accuracy',
                       cv = folds,
                       verbose=1,
                       return_train_score=True,
                       n_jobs=12)
model_cv.fit(X_train,y_train)
'''
By default joblib.Parallel uses the 'loky' backend module to start separate Python worker processes to execute tasks 
concurrently on separate CPUs. This is a reasonable default for generic Python programs but can induce a significant 
overhead as the input and output data need to be serialized in a queue for communication with the worker processes 
'''

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:   17.9s
[Parallel(n_jobs=12)]: Done 100 out of 100 | elapsed:   19.3s finished


GridSearchCV(cv=KFold(n_splits=5, random_state=42, shuffle=True),
             error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=12,
             param_grid=[{'C': [1, 10, 100, 1000],
                          'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag',
                                     'saga']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='accuracy', verbose=

In [12]:
print(model_cv.best_score_)
print(model_cv.best_params_)
print(model_cv.best_estimator_)

0.9619047619047618
{'C': 1, 'solver': 'newton-cg'}
LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
                   warm_start=False)


In [23]:
lr1 = LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
                   warm_start=False)
lr1.fit(X_train,y_train)
print(lr1.coef_)
print(lr1.classes_)
print(lr1.intercept_)
print(lr1.n_iter_)

[[-0.40471739  0.86855295 -2.27790762 -0.95758355]
 [ 0.46656108 -0.37541401 -0.18769284 -0.72035824]
 [-0.06184369 -0.49313893  2.46560046  1.67794179]]
[0 1 2]
[  8.86221326   2.21020683 -11.07242008]
[16]


In [17]:
'''
# The tol parameter tells the optimization algorithm when to stop. If the value of tol is too big, the algorithm stops before
  it can converge. The Best tolerance is where your fetures cofficent doesn't flactuate much.
  
# Inverse regularization parameter - A control variable that retains strength modification of Regularization by being inversely positioned to the Lambda regulator.
Given how Scikit cites it as being:
C = 1/λ

newton-cg — A newton method. Newton methods use an exact Hessian matrix. It's slow for large datasets, because it computes
            the second derivatives.

lbfgs — Stands for Limited-memory Broyden–Fletcher–Goldfarb–Shanno. It approximates the second derivative matrix updates 
        with gradient evaluations. It stores only the last few updates, so it saves memory. It isn't super fast with large data sets.
        It will be the default solver as of Scikit-learn version 0.22.0.

liblinear — Library for Large Linear Classification. Uses a coordinate descent algorithm. Coordinate descent is based on
             minimizing a multivariate function by solving univariate optimization problems in a loop. In other words, it moves 
            toward the minimum in one direction at a time. It is the default solver for Scikit-learn versions earlier than 0.22.0.
            It performs pretty well with high dimensionality. It does have a number of drawbacks. It can get stuck, is unable to run 
            in parallel, and can only solve multi-class logistic regression with one-vs.-rest.

sag — Stochastic Average Gradient descent. A variation of gradient descent and incremental aggregated gradient approaches that uses
      a random sample of previous gradient values. Fast for big datasets.

saga — Extension of sag that also allows for L1 regularization. Should generally train faster than sag.

'''

array([[-0.40471739,  0.86855295, -2.27790762, -0.95758355],
       [ 0.46656108, -0.37541401, -0.18769284, -0.72035824],
       [-0.06184369, -0.49313893,  2.46560046,  1.67794179]])

In [25]:
# Prediction Part
y_pred = lr1.predict(X_train)
y_pred

array([1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1,
       2, 0, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 0, 2, 0, 0,
       2, 1, 2, 2, 2, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1,
       2, 1, 2, 0, 2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0,
       2, 0, 1, 2, 2, 1, 2, 1, 1, 2, 2, 0, 1, 1, 0, 1, 2])

In [28]:
y_pred_prob =  lr1.predict_proba(X_train)
y_pred_prob
# Since there are three classes so it will give prob of every class

array([[4.15028286e-02, 9.45095515e-01, 1.34016561e-02],
       [7.75324069e-04, 4.52776923e-01, 5.46447753e-01],
       [1.21718958e-04, 1.32293705e-01, 8.67584576e-01],
       [6.61302910e-03, 8.84155388e-01, 1.09231582e-01],
       [1.91554057e-06, 9.28625608e-03, 9.90711828e-01],
       [2.01722394e-02, 8.96393042e-01, 8.34347186e-02],
       [1.97933875e-07, 7.41735231e-03, 9.92582450e-01],
       [1.04035426e-02, 7.34509510e-01, 2.55086947e-01],
       [9.73106785e-01, 2.68930789e-02, 1.35927506e-07],
       [1.17975958e-07, 7.44535388e-03, 9.92554528e-01],
       [2.01734531e-02, 9.47826351e-01, 3.20001956e-02],
       [9.66980993e-01, 3.30188470e-02, 1.60174408e-07],
       [9.79387839e-01, 2.06120543e-02, 1.07068621e-07],
       [9.46304378e-01, 5.36940272e-02, 1.59486390e-06],
       [5.83732204e-02, 9.31025102e-01, 1.06016772e-02],
       [8.63522657e-04, 4.03695118e-01, 5.95441360e-01],
       [9.35865044e-01, 6.41335075e-02, 1.44832106e-06],
       [9.33125169e-01, 6.68741