In [1]:
from sklearn.datasets import load_iris  # https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
iris = load_iris()

In [2]:
# X: inputs
# y: target
# => f(X) = y

X = iris.data
y = iris.target

feature_names = iris.feature_names
target_names = iris.target_names

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# test_size is the amount of data to be tested
# the less the size, the higher the accuracy, but could be a misleading accuracy since the sample is so small
# the more data we have, the more we can train the models

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 4)
(30, 4)
(120,)
(30,)


In [4]:
# create a model using an algorithm that divides the data
from sklearn.neighbors import KNeighborsClassifier  # could be another algorithm
knn = KNeighborsClassifier(n_neighbors=3)  # because we know there's 3 types of flowers, but could be another number
knn.fit(X_train, y_train)  # use the algorithm
y_pred = knn.predict(X_test)

In [5]:
from sklearn import metrics
print(metrics.accuracy_score(y_test, y_pred))

0.9333333333333333


In [6]:
sample = [[3,5,4,2], [2,3,5,4]]
predictions = knn.predict(sample)
pred_species = [iris.target_names[p] for p in predictions]
print('predicitons: ', pred_species)

predicitons:  ['versicolor', 'virginica']


In [7]:
from joblib import dump, load
dump(knn, 'mlbrain.joblib')  # save the model for future use, because train it everytime could be too time/resource consuming
# in case of any update on the model, it can be dumped again

['mlbrain.joblib']

In [8]:
model = load('mlbrain.joblib')  # load the saved model
model.predict(X_test)  # use the saved model

sample = [[3,5,4,2], [2,3,5,4]]
predictions = model.predict(sample)  # use the saved model
pred_species = [iris.target_names[p] for p in predictions]
print('predicitons: ', pred_species)

predicitons:  ['versicolor', 'virginica']
