## Logistic Regression One Versus Rest (Multi Class Classification)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

In [3]:
## create the dataset
X, y = make_classification(n_samples=1000, n_features=10, n_classes=3, n_informative=3, random_state=42)

In [4]:
pd.DataFrame(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.771533,-1.474336,2.196646,-0.678992,0.767479,-1.226179,-0.237566,0.681743,1.089962,0.962503
1,1.858383,-3.680880,0.227496,-3.818368,0.635968,1.609521,-0.153135,-0.474178,1.341139,-0.771772
2,-0.987248,1.539168,0.585904,1.185281,-0.736770,-1.406815,0.684783,-0.322028,-1.451034,1.325432
3,-1.022420,1.381787,2.015275,0.861434,1.290644,-1.889649,1.009138,0.363116,-1.844238,0.311110
4,-0.131161,-2.079008,1.483744,1.636858,-0.734811,-0.640154,2.666422,-1.085748,-0.892851,0.029796
...,...,...,...,...,...,...,...,...,...,...
995,0.982677,-1.908917,-0.385673,-2.565989,0.434503,-1.334611,-0.060318,1.040062,0.433730,-0.739771
996,-1.144949,1.434090,-0.459657,0.358606,-1.079842,0.918915,1.339638,1.193113,-2.473007,1.021515
997,-1.227082,1.656028,0.472630,1.718125,-1.366858,0.018017,1.131626,-0.863494,-1.839323,-0.031203
998,1.285276,-0.297152,-0.671721,-1.843337,-1.254161,-0.276889,-2.913304,-1.315441,2.854465,1.309444


In [5]:
pd.DataFrame(y)

Unnamed: 0,0
0,1
1,2
2,1
3,1
4,2
...,...
995,2
996,1
997,1
998,1


In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.30, random_state=42)

In [8]:
from sklearn.linear_model import LogisticRegression

In [9]:
logistic_reg = LogisticRegression(multi_class='ovr')

In [10]:
logistic_reg.fit(X_train, y_train)



In [11]:
y_pred = logistic_reg.predict(X_test)

In [12]:
y_pred

array([2, 0, 1, 2, 0, 1, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0,
       2, 0, 1, 0, 2, 1, 0, 2, 2, 0, 1, 0, 0, 2, 2, 2, 1, 0, 0, 1, 2, 0,
       1, 2, 1, 0, 1, 1, 2, 0, 1, 0, 2, 2, 2, 2, 1, 2, 0, 2, 2, 2, 1, 1,
       0, 1, 0, 1, 0, 2, 2, 0, 0, 0, 2, 1, 1, 2, 2, 0, 2, 1, 0, 1, 1, 2,
       1, 1, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 2, 1, 0, 2, 1, 1, 0, 0, 2,
       0, 1, 2, 0, 0, 0, 1, 1, 2, 2, 1, 0, 0, 1, 2, 0, 0, 1, 0, 2, 1, 0,
       2, 2, 2, 0, 0, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 0, 0, 1, 2, 0, 0, 2,
       1, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0, 2, 1, 0, 0, 2,
       0, 1, 1, 2, 2, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 1, 2, 2, 1, 2,
       0, 1, 2, 2, 1, 1, 0, 2, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 1, 0, 1, 2, 0, 0, 2,
       0, 2, 0, 0, 2, 2, 0, 2, 2, 0, 1, 2, 2, 0, 1, 1, 0, 1, 2, 0, 2, 2,
       0, 0, 0, 2, 1, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0, 1, 0, 2, 2, 2,
       0, 2, 2, 0, 2, 2, 0, 2, 0, 0, 2, 2, 0, 1])

In [13]:
y_proba = logistic_reg.predict_proba(X_test)

In [14]:
print(y_proba)

[[9.22522618e-03 2.44034837e-01 7.46739937e-01]
 [4.89544732e-01 2.45683892e-01 2.64771376e-01]
 [3.54058728e-01 5.90982894e-01 5.49583777e-02]
 [3.61503652e-02 1.82525799e-01 7.81323836e-01]
 [4.01755652e-01 3.69771801e-01 2.28472547e-01]
 [3.83231093e-01 5.21315790e-01 9.54531172e-02]
 [2.80650254e-01 2.18835528e-01 5.00514218e-01]
 [6.86748730e-01 3.10472372e-01 2.77889720e-03]
 [5.72702266e-01 4.24212176e-01 3.08555794e-03]
 [2.69546511e-02 1.49732219e-01 8.23313130e-01]
 [7.81732426e-02 2.40515273e-01 6.81311484e-01]
 [1.25869742e-02 1.72900197e-01 8.14512828e-01]
 [4.99079983e-02 1.76510922e-01 7.73581080e-01]
 [3.95794074e-02 3.56708621e-01 6.03711972e-01]
 [2.01209577e-02 2.98735180e-01 6.81143862e-01]
 [7.35993162e-03 2.18049789e-01 7.74590279e-01]
 [2.71868855e-02 2.81421225e-01 6.91391890e-01]
 [2.36865580e-01 4.86991105e-01 2.76143315e-01]
 [5.86578157e-01 3.56583187e-01 5.68386559e-02]
 [6.45857344e-01 3.06957260e-01 4.71853953e-02]
 [6.93031625e-01 2.47770491e-01 5.919788

In [15]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [16]:
score = accuracy_score(y_test, y_pred)
score

0.68

In [17]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[79, 16,  8],
       [31, 38, 27],
       [ 3, 11, 87]])

In [18]:
cr = classification_report(y_test, y_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.70      0.77      0.73       103
           1       0.58      0.40      0.47        96
           2       0.71      0.86      0.78       101

    accuracy                           0.68       300
   macro avg       0.67      0.67      0.66       300
weighted avg       0.67      0.68      0.66       300



### Grid Search CV

In [19]:
logistic_reg = LogisticRegression()

In [20]:
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
multi_class = ['ovr', 'multinomial']
c_vals = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
penalty = ['l1', 'l2', 'elasticnet']
max_iter = [100, 200, 500, 1000]
warm_start = [True, False]

In [21]:
hp = dict(solver = solvers, multi_class = multi_class, C = c_vals, penalty = penalty, max_iter = max_iter, warm_start = warm_start)

In [22]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold

In [23]:
cv = StratifiedKFold()

In [24]:
grid = GridSearchCV(estimator=logistic_reg, param_grid=hp, scoring='accuracy', cv=cv, n_jobs=-1)

In [25]:
grid.fit(X_train, y_train)

6160 fits failed out of a total of 10080.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
560 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/dhruvsmac/Desktop/development/machine learning/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/dhruvsmac/Desktop/development/machine learning/.venv/lib/python3.12/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dhruvsmac/Desktop/development/machine learning/.venv/lib/python3.12/site-packages/sklearn/linear_model/_logistic

In [26]:
y_pred = grid.predict(X_test)

In [27]:
grid.best_params_

{'C': 0.01,
 'max_iter': 100,
 'multi_class': 'multinomial',
 'penalty': 'l1',
 'solver': 'saga',
 'warm_start': True}

In [28]:
score = accuracy_score(y_test, y_pred)
score

0.7033333333333334

In [29]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[78, 17,  8],
       [28, 43, 25],
       [ 3,  8, 90]])

In [30]:
cr = classification_report(y_test, y_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.72      0.76      0.74       103
           1       0.63      0.45      0.52        96
           2       0.73      0.89      0.80       101

    accuracy                           0.70       300
   macro avg       0.69      0.70      0.69       300
weighted avg       0.69      0.70      0.69       300

