## Logistic Multiclass Classification

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load IRIS dataset
from sklearn.datasets import load_iris
dataset = load_iris()
dataset.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [3]:
dataset.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [4]:
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
df['class'] = dataset.target
df.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2
149,5.9,3.0,5.1,1.8,2


In [5]:
df.shape

(150, 5)

In [6]:
df['class'].value_counts()

0    50
1    50
2    50
Name: class, dtype: int64

In [7]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [8]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [9]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: class, dtype: int64

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, shuffle=True)

In [11]:
# Hyperparameter Tuning using GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

# Define the parameters
parameters = {'multi_class':('ovr', 'multinomial'), 'penalty':('l1', 'l2', 'elasticnet'), 'C':[1,10,20,30], 'solver':('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga')} 
clf = GridSearchCV(LogisticRegression(), param_grid=parameters, cv=5)

In [12]:
clf.fit(X_train, y_train)

In [13]:
clf.best_params_

{'C': 1, 'multi_class': 'multinomial', 'penalty': 'l1', 'solver': 'saga'}

In [14]:
mul_classifier = LogisticRegression(multi_class='multinomial', C=1, penalty='l2', solver='sag')

In [15]:
mul_classifier.fit(X_train, y_train)

In [16]:
y_pred = mul_classifier.predict(X_test)

In [17]:
y_pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [21]:
mul_classifier.predict_proba(X_test)

array([[7.58313577e-03, 7.88408238e-01, 2.04008626e-01],
       [9.77116801e-01, 2.28829578e-02, 2.41135232e-07],
       [4.61733937e-07, 7.70028256e-03, 9.92299256e-01],
       [1.17653185e-02, 7.32674402e-01, 2.55560280e-01],
       [6.87403599e-03, 8.13750275e-01, 1.79375689e-01],
       [9.67719182e-01, 3.22801751e-02, 6.43217929e-07],
       [9.51950818e-02, 8.69074752e-01, 3.57301666e-02],
       [1.61305217e-03, 2.66034870e-01, 7.32352078e-01],
       [3.02029348e-03, 6.18060884e-01, 3.78918822e-01],
       [3.88313523e-02, 9.03646671e-01, 5.75219770e-02],
       [2.46675078e-03, 3.42112274e-01, 6.55420975e-01],
       [9.46498407e-01, 5.35002288e-02, 1.36407609e-06],
       [9.83362662e-01, 1.66372414e-02, 9.65205877e-08],
       [9.45413545e-01, 5.45850751e-02, 1.37965360e-06],
       [9.83196890e-01, 1.68028706e-02, 2.39283202e-07],
       [1.63615023e-02, 7.52624894e-01, 2.31013603e-01],
       [4.72532757e-05, 4.45614130e-02, 9.55391334e-01],
       [2.68566037e-02, 9.00280

In [18]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print(accuracy_score(y_test, y_pred))

1.0


In [19]:
print(confusion_matrix(y_test, y_pred))

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [20]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

