In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict, KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
import sys

In [None]:
def get_params():

    yield "Gaussian Naive Bayes", GaussianNB()

    yield "K Nearest Neighbor", KNeighborsClassifier()

    yield "Linear SVC", LinearSVC()

    yield "Decision Tree", DecisionTreeClassifier()

    yield "Random Forest", RandomForestClassifier()


def print_scores(title, cm, acc, file=sys.stdout):

    print("{}\n".format(title))
    print("The confusion matrix is: ")
    print(cm)
    print("The sum of the values is {}".format(np.sum(cm)))
    print("The accuracy score: {}\n".format(acc))


def get_scores(X, y, *, clf):
    """ """
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    y_pred = cross_val_predict(estimator=clf, X=X, y=y, cv=kf)

    cm = confusion_matrix(y, y_pred)
    acc = accuracy_score(y, y_pred)

    return cm, acc


for title, model in get_params():
    """ """
    cm, acc = get_scores(X=X, y=y, clf=model)
    print_scores(title=title, cm=cm, acc=acc)


In [None]:
uri = Path("../../res/iris.csv")

columns = ["sepal-length", "sepal-width", "petal-length", "petal-width", "class"]

# load dataset
df = pd.read_csv(uri, names=columns)
df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
# split dataset into features and class:
# features
X = df.iloc[:, :-1].copy()
# class
y = df.iloc[:, -1].copy()

# kf = KFold(n_splits=5, shuffle=True, randome_state=42)
print(y.describe())
X.head()

count             150
unique              3
top       Iris-setosa
freq               50
Name: class, dtype: object


Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
