In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [31]:
from sklearn.datasets import make_classification

In [32]:
# Create the dataset
X,y=make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [33]:
print(X.shape, y.shape)
print(X[:1])
print(y[:5])

(1000, 10) (1000,)
[[ 0.96479937 -0.06644898  0.98676805 -0.35807945  0.99726557  1.18189004
  -1.61567885 -1.2101605  -0.62807677  1.22727382]]
[0 1 1 0 1]


In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)  

In [35]:
## Model training
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [36]:
model.fit(X_train, y_train)

In [37]:
y_pred = model.predict(X_test)

In [38]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [39]:
print("The accuracy of the model is ",accuracy_score(y_test, y_pred))

The accuracy of the model is  0.8466666666666667


In [40]:
cm=confusion_matrix(y_test, y_pred)
print("The confusion matrix is ")
print(cm)

The confusion matrix is 
[[118  17]
 [ 29 136]]


In [41]:
print("The classification report is ")
print(classification_report(y_test, y_pred))

The classification report is 
              precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



Hyperparameter Turning and Cross Validation

In [42]:
model=LogisticRegression()
penalty = ['l1', 'l2','elasticnet']
c=[0.001,0.01,0.1,1,10,100]
solver=['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']

In [43]:
params=dict(penalty=penalty, C=c, solver=solver)

In [44]:
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [45]:
from sklearn.model_selection import GridSearchCV

grid=GridSearchCV(estimator=model, param_grid=params,scoring='accuracy',cv=cv,n_jobs=-1)

In [46]:
grid

In [47]:
grid.fit(X_train, y_train)

240 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\skle

In [48]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}

In [49]:
grid.best_score_

np.float64(0.8785714285714287)

In [50]:
y_pred=grid.predict(X_test)

In [51]:
print("The accuracy of the model is ",accuracy_score(y_test, y_pred))
cm=confusion_matrix(y_test, y_pred)
print("The confusion matrix is ")
print(cm)
print("The classification report is ")
print(classification_report(y_test, y_pred))

The accuracy of the model is  0.8533333333333334
The confusion matrix is 
[[124  11]
 [ 33 132]]
The classification report is 
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



Randomised Search CV

In [55]:
from sklearn.model_selection import RandomizedSearchCV
randomizedSearchCV=RandomizedSearchCV(estimator=model, param_distributions=params,scoring='accuracy',cv=5)

In [56]:
randomizedSearchCV.fit(X_train, y_train)

20 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\17viv\Documents\MLPrac\venv\lib\site-packages\sklearn

In [57]:
randomizedSearchCV.best_score_ 

np.float64(0.8785714285714287)

In [58]:
randomizedSearchCV.best_params_

{'solver': 'lbfgs', 'penalty': 'l2', 'C': 0.01}

In [59]:
y_pred=randomizedSearchCV.predict(X_test)

In [60]:
print("The accuracy of the model is ",accuracy_score(y_test, y_pred))
cm=confusion_matrix(y_test, y_pred)
print("The confusion matrix is ")
print(cm)
print("The classification report is ")
print(classification_report(y_test, y_pred))


The accuracy of the model is  0.8533333333333334
The confusion matrix is 
[[124  11]
 [ 33 132]]
The classification report is 
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300

