In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score, roc_curve, multilabel_confusion_matrix

In [2]:
df = pd.read_csv('Iris.csv')
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [4]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
df['Species'] = df['Species'].replace({'Iris-setosa':0, 'Iris-versicolor':1,'Iris-virginica':2})

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    int64  
dtypes: float64(4), int64(2)
memory usage: 7.2 KB


In [7]:
df

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,2
146,147,6.3,2.5,5.0,1.9,2
147,148,6.5,3.0,5.2,2.0,2
148,149,6.2,3.4,5.4,2.3,2


In [8]:
x = df.drop(['Id','Species'], axis = 1)
y = df['Species']

In [9]:
x

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [10]:
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: Species, Length: 150, dtype: int64

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=1, stratify=y)

In [12]:
y_test.value_counts()

0    10
1    10
2    10
Name: Species, dtype: int64

In [13]:
clf_model = LogisticRegression(multi_class= 'ovr')  # One vs Rest
clf_model.fit(x_train,y_train)

In [14]:
y_pred = clf_model.predict(x_test)
y_pred

array([2, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 1, 2, 1, 2, 0, 2, 1, 1, 2, 1, 1,
       0, 0, 2, 1, 0, 0, 2, 1], dtype=int64)

In [15]:
y_test

107    2
9      0
98     1
11     0
43     0
0      0
148    2
111    2
121    2
76     1
16     0
53     1
112    2
62     1
113    2
15     0
102    2
71     1
57     1
127    2
74     1
66     1
37     0
8      0
147    2
106    2
5      0
12     0
56     1
54     1
Name: Species, dtype: int64

In [16]:
y_pred_prob = clf_model.predict_proba(x_test)

In [17]:
# y_pred_prob

In [18]:
test_accuracy = accuracy_score(y_test,y_pred)
print('Testing data accuracy is :', test_accuracy)

Testing data accuracy is : 0.9333333333333333


In [19]:
y_pred_train = clf_model.predict(x_train)
train_accuracy = accuracy_score(y_train,y_pred_train)
print('Training data accuracy is:', train_accuracy)

Training data accuracy is: 0.9583333333333334


In [20]:
multilabel_confusion_matrix(y_test,y_pred)

array([[[20,  0],
        [ 0, 10]],

       [[19,  1],
        [ 1,  9]],

       [[19,  1],
        [ 1,  9]]], dtype=int64)

In [21]:
confusion_matrix(y_test,y_pred)

array([[10,  0,  0],
       [ 0,  9,  1],
       [ 0,  1,  9]], dtype=int64)

In [22]:
clf_report = classification_report(y_test,y_pred)
print(clf_report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      0.90      0.90        10
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30



In [24]:
# x_test

In [25]:
clf_model.predict(x_test)

array([2, 0, 1, 0, 0, 0, 2, 2, 2, 1, 0, 1, 2, 1, 2, 0, 2, 1, 1, 2, 1, 1,
       0, 0, 2, 1, 0, 0, 2, 1], dtype=int64)

In [26]:
x_test.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
107,7.3,2.9,6.3,1.8
9,4.9,3.1,1.5,0.1
98,5.1,2.5,3.0,1.1
11,4.8,3.4,1.6,0.2
43,5.0,3.5,1.6,0.6


In [27]:
clf_model.predict(x_test.head())

array([2, 0, 1, 0, 0], dtype=int64)

In [30]:
clf_model.predict(x_test.iloc[2:3])

array([1], dtype=int64)

In [31]:
x_test.iloc[2:3]

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
98,5.1,2.5,3.0,1.1


In [32]:
x_test.columns

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'], dtype='object')

In [36]:
def predict_class(SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm):
    prediction = clf_model.predict([[SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm]])
    print('Predicted class is:',prediction[0])
    return prediction[0]

In [40]:
predict_class(5,3,1,0)

Predicted class is: 0




0

In [42]:
import pickle
import os
model_folder_path = 'model'
if not os.path.exists(model_folder_path):
    os.mkdir(model_folder_path)
    
pickle.dump(clf_model,open(f'{model_folder_path}/model.pkl', 'wb'))