In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
## Sklearn library allows to create datasets that are already standardized
from sklearn.datasets import make_classification

In [5]:
## Create the dataset
X,Y = make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=42)

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.30,random_state=42)

In [9]:
## Model Traning 
from sklearn.linear_model import LogisticRegression
logic_reg = LogisticRegression()

In [10]:
logic_reg.fit(X_train,Y_train)

In [14]:
Y_pred = logic_reg.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, classification_report

In [18]:
score = accuracy_score(Y_test,Y_pred)
print('Accuracy Score : ', score)

Accuracy Score :  0.8466666666666667


In [19]:
cm = confusion_matrix(Y_test,Y_pred)
print("confusion matrix : ", cm)
print(classification_report(Y_test,Y_pred))

confusion matrix :  [[118  17]
 [ 29 136]]
              precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



## Hyperparameter Tuning and cross validation

### GridSearch CV

In [21]:
model = LogisticRegression()
penalty = ['l1','l2','elasticnet']
c_values = [100,10,1.0,0.1,0.01]
solver = ['newton-cg','lbfgs','liblinear','sag','saga']

In [22]:
dict(penalty=penalty,C=c_values,solver=solver)

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100, 10, 1.0, 0.1, 0.01],
 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

In [23]:
params = dict(penalty=penalty,C=c_values,solver=solver)

In [25]:
## GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid = GridSearchCV(estimator=model,param_grid=params,scoring='accuracy',cv=cv,n_jobs=-1)


In [26]:
grid

In [27]:
grid.fit(X_train, Y_train)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/vaibhavkale/ML Learnings/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/vaibhavkale/ML Learnings/venv/lib/python3.10/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/vaibhavkale/ML Learnings/venv/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/vaibhavkale/ML Lear

In [28]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}

In [29]:
grid.best_score_

np.float64(0.8785714285714287)

In [31]:
y_pred = grid.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, classification_report
score = accuracy_score(Y_test,y_pred)
print('Accuracy Score : ', score)
cm = confusion_matrix(Y_test,y_pred)
print("confusion matrix : ", cm)
print(classification_report(Y_test,y_pred))

Accuracy Score :  0.8533333333333334
confusion matrix :  [[124  11]
 [ 33 132]]
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



### RandomizedSearchCV

In [36]:
from sklearn.model_selection import RandomizedSearchCV
model = LogisticRegression()
randomcv = RandomizedSearchCV(estimator=model,param_distributions=params,scoring='accuracy',cv=5)

In [37]:
randomcv.fit(X_train,Y_train)


35 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/vaibhavkale/ML Learnings/venv/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/vaibhavkale/ML Learnings/venv/lib/python3.10/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/vaibhavkale/ML Learnings/venv/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/vaibhavkale/ML Learni

In [38]:
randomcv.best_score_

np.float64(0.8785714285714287)

In [39]:
randomcv.best_params_

{'solver': 'sag', 'penalty': 'l2', 'C': 0.01}

In [40]:
y_pred = randomcv.predict(X_test)

In [41]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, classification_report
score = accuracy_score(Y_test,y_pred)
print('Accuracy Score : ', score)
cm = confusion_matrix(Y_test,y_pred)
print("confusion matrix : ", cm)
print(classification_report(Y_test,y_pred))

Accuracy Score :  0.8533333333333334
confusion matrix :  [[124  11]
 [ 33 132]]
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300

