In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import pickle

In [3]:
iris_data = load_iris()

In [4]:
data = pd.DataFrame(iris_data["data"], columns=iris_data["feature_names"])
target = pd.Series([iris_data["target_names"][cat_idx] for cat_idx in iris_data["target"]],dtype="category")
features = iris_data["feature_names"]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)

In [6]:
clf = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=1000)
clf.fit(X_train[features], y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [7]:
y_train_pred = clf.predict(X_train[features])
y_test_pred = clf.predict(X_test[features])

In [8]:
confusion_matrix(y_train, y_train_pred)

array([[40,  0,  0],
       [ 0, 37,  3],
       [ 0,  1, 39]])

In [9]:
accuracy_score(y_train, y_train_pred)

0.9666666666666667

In [10]:
confusion_matrix(y_test, y_test_pred)

array([[10,  0,  0],
       [ 0, 10,  0],
       [ 0,  0, 10]])

In [11]:
accuracy_score(y_test, y_test_pred)

1.0

In [27]:
with open('iris_classifier.pickle', 'wb') as f:
    pickle.dump(clf, f, pickle.HIGHEST_PROTOCOL)

In [28]:
with open("iris_classifier_features.pickle", "wb") as f:
    pickle.dump(features, f, pickle.HIGHEST_PROTOCOL)

In [12]:
new_iris_flower = pd.DataFrame([
    {
        "sepal length (cm)": 4.5,
        "sepal width (cm)": 2.,
        "petal length (cm)": 1.5,
        "petal width (cm)": 0.2,
    }
])

new_iris_flower

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,4.5,2.0,1.5,0.2


In [30]:
with open("iris_classifier.pickle", "rb") as f:
    clf = pickle.load(f)

with open("iris_classifier_features.pickle", "rb") as f:
    features = pickle.load(f)

In [13]:
clf.predict(new_iris_flower[features])

array(['setosa'], dtype=object)

In [14]:
clf.predict_proba(new_iris_flower[features])

array([[8.99316896e-01, 1.00682129e-01, 9.75150108e-07]])