In [22]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.metrics import accuracy_score

In [23]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"]

df = pd.read_csv(url, names=names)

df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [24]:
df.tail()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


In [25]:
df.shape

(150, 5)

Training dataset, which contains 0, 2, 4, ... ,148th samples

In [26]:
X_train = df.values[0::2, 0:4]            # (75,4)
y_train = df.values[0::2, 4: ]
y_train = np.resize(y_train, len(df)//2)  # (75,)

Testing dataset, samples from 1, 3, 5, ... ,149th samples

In [53]:
X_test = df.values[1::2, 0:4]            # (75,4)
y_test = df.values[1::2, 4: ]
y_test = np.resize(y_test, len(df)//2)   # (75,)

Use a multinomial logistic regression model (4 numerical features, 1 categorical target)

In [78]:
model = linear_model.LogisticRegression(multi_class='multinomial', solver='lbfgs', penalty='l2')
model.fit(X_train, y_train)

In [79]:
prediction = model.predict(X_test)
accuracy = accuracy_score(y_test, prediction)
print(f"Model accuracy: {accuracy * 100}%")

Model accuracy: 96.0%


Accuracy of multinomial Logistic Regression (solver=lbfgs, penalty=l2) on Iris dataset is 96.0%

Using probabilistic multinomial Logistic Regression:

In [80]:
proba_pred = model.predict_proba(X_test)
prediction = np.empty(len(proba_pred), dtype="S15")

classes = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

for i in range(len(proba_pred)):
    prediction[i] = classes[np.argmax(proba_pred[i])]
    
# prediction
accuracy = accuracy_score(y_test, prediction)
accuracy
prediction

array([b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-virginica',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolor', b'Iris-versicolor',
       b'Iris-versicolor', b'Iris-versicolo