In [None]:
# Standard imports
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 1. Getting the data ready

In [None]:
from sklearn import datasets

In [None]:
iris = datasets.load_iris()

In [None]:
print(iris.DESCR)

In [None]:
print(iris.data)

In [None]:
print(iris.feature_names)

# - `sepal` = กลีบเลี้ยง

# - `petal` = กลีบดอกไม้

![Image of Yaktocat](https://www.w3resource.com/w3r_images/iris_flower_sepal_and_petal.png)

In [None]:
print(iris.target)

In [None]:
print(iris.target_names)

![Image of Yaktocat](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Machine+Learning+R/iris-machinelearning.png)

## 1.1 Create DataFrame

In [None]:
df = datasets.load_iris(as_frame=True)["frame"]

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df["target"].value_counts()

In [None]:
X = df.drop(columns="target")
X.head()

In [None]:
y = df["target"]
y.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=27)

In [None]:
X_train.head()

In [None]:
y_train.head()

In [None]:
X_test.head()

In [None]:
y_test.head()

In [None]:
print(f"Number of Training Set (Freture): {len(X_train)}")
print(f"Number of Training Set (Lables) : {len(y_train)}")

In [None]:
print(f"Number of Testing Set (Freture): {len(X_test)}")
print(f"Number of Testing Set (Lables) : {len(y_test)}")

## 2. Choose the model and hyperparameters

In [None]:
# We'll use a Random Forest
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()

In [None]:
model.get_params()

##  3. Fit the model/algorithm and use it to make predictions on our data

In [None]:
model.fit(X_train, y_train)

In [None]:
from sklearn.tree import plot_tree, export_text

In [None]:
plt.figure(figsize=(10, 9))
plot_tree(model, feature_names=iris.feature_names, class_names=iris.target_names, filled=True);

In [None]:
print(export_text(model, feature_names=iris.feature_names, show_weights=True))

In [None]:
model.score(X_train, y_train)

In [None]:
model.score(X_test, y_test)

In [None]:
model.predict([[10, 5, 5, 6]])

In [None]:
print(iris.target_names[model.predict([[10, 5, 5, 6]])])

In [None]:
y_preds = model.predict(X_test)

## 4. Evaluate the model

In [None]:
# credit code: https://www.kaggle.com/grfiv4/plot-a-confusion-matrix 

def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(10, 10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_test, y_preds)

In [None]:
print(f"class[0]: {iris.target_names[0]}")
print(f"class[1]: {iris.target_names[1]}")
print(f"class[2]: {iris.target_names[2]}")

In [None]:
plot_confusion_matrix(cm           = confusion_matrix(y_test, y_preds), 
                      normalize    = False,
                      target_names = iris.target_names,
                      title        = "Confusion Matrix")

## 5. Improve a model

In [None]:
model = DecisionTreeClassifier(criterion="entropy", max_depth=2)

In [None]:
model.fit(X_train, y_train)

In [None]:
model.fit(X_train, y_train)

In [None]:
plt.figure(figsize=(10, 9))
plot_tree(model, feature_names=iris.feature_names, class_names=iris.target_names, filled=True);

In [None]:
print(export_text(model, feature_names=iris.feature_names, show_weights=True))

In [None]:
model.score(X_test, y_test)

In [None]:
y_preds = model.predict(X_test)

In [None]:
model.score(X_test, y_test)

In [None]:
confusion_matrix(y_test, y_preds)

In [None]:
print(f"class[0]: {iris.target_names[0]}")
print(f"class[1]: {iris.target_names[1]}")
print(f"class[2]: {iris.target_names[2]}")

In [None]:
plot_confusion_matrix(cm           = confusion_matrix(y_test, y_preds), 
                      normalize    = False,
                      target_names = iris.target_names,
                      title        = "Confusion Matrix")

# 6. Saving and loading a model with [`pickle`](https://docs.python.org/3/library/pickle.html)

In [None]:
import pickle

# Save an existing model to file
# wb = write binary
pickle.dump(model, open("model/model.pkl", "wb"))

In [None]:
# Load a saved model
loaded_pickle_model = pickle.load(open("model/model.pkl", "rb"))

In [None]:
# Make predictions and evaluate the loaded model
pickle_y_preds = loaded_pickle_model.predict(X_test)
pickle_y_preds

In [None]:
confusion_matrix(y_test, pickle_y_preds)

In [None]:
print(f"class[0]: {iris.target_names[0]}")
print(f"class[1]: {iris.target_names[1]}")
print(f"class[2]: {iris.target_names[2]}")

In [None]:
plot_confusion_matrix(cm           = confusion_matrix(y_test, pickle_y_preds), 
                      normalize    = False,
                      target_names = iris.target_names,
                      title        = "Confusion Matrix")