In [1]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from prettytable import PrettyTable
from sklearn import tree
import numpy as np

## Dataset

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

## Experimento Holdout

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, test_size=0.3)

#LogisticRegression
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
train_pred = clf.predict(X_train)
test_pred  = clf.predict(X_test)

#Print Table
table = PrettyTable(['','Accuracy', 'F1'])
table.add_row(['Train', "%.4f" % accuracy_score(y_train, train_pred), "%.4f" % f1_score(y_train, train_pred, average='macro')])
table.add_row(['Test',"%.4f" % accuracy_score(y_test, test_pred), "%.4f" % f1_score(y_test, test_pred, average='macro')])
print(table)

+-------+----------+--------+
|       | Accuracy |   F1   |
+-------+----------+--------+
| Train |  0.9619  | 0.9624 |
|  Test |  0.9778  | 0.9769 |
+-------+----------+--------+


## Experimento 10-Fold

In [4]:
kf        = KFold(n_splits=10)
acc_list  = []
f1_list   = []
cont_fold = 0
table = PrettyTable(['', 'Accuracy', 'F1'])

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    #LogisticRegression
    clf = LogisticRegression(max_iter=1000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc_list.append(accuracy_score(y_test, y_pred))
    f1_list.append(f1_score(y_test, y_pred, average='macro'))
    cont_fold += 1
    
    #Table config
    table.add_row(['Fold {0}'.format(cont_fold),"%.4f" % accuracy_score(y_test, y_pred), 
                 "%.4f" % f1_score(y_test, y_pred, average='macro')])

#Tables print
print(table)
table.clear_rows()
table.add_row(['Total Average', "%.4f" % np.average(acc_list), "%.4f" % np.average(f1_list)])
print(table)

+---------+----------+--------+
|         | Accuracy |   F1   |
+---------+----------+--------+
|  Fold 1 |  1.0000  | 1.0000 |
|  Fold 2 |  1.0000  | 1.0000 |
|  Fold 3 |  1.0000  | 1.0000 |
|  Fold 4 |  1.0000  | 1.0000 |
|  Fold 5 |  0.9333  | 0.4828 |
|  Fold 6 |  0.8667  | 0.4643 |
|  Fold 7 |  1.0000  | 1.0000 |
|  Fold 8 |  0.8667  | 0.4643 |
|  Fold 9 |  0.8667  | 0.4643 |
| Fold 10 |  0.9333  | 0.4828 |
+---------+----------+--------+
+---------------+----------+--------+
|               | Accuracy |   F1   |
+---------------+----------+--------+
| Total Average |  0.9467  | 0.7358 |
+---------------+----------+--------+


## Experimento Comparando Classificadores

In [5]:
class_1 = []
class_2 = []

kf = StratifiedKFold(n_splits=10)

for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    #LogisticRegression
    clf = LogisticRegression(max_iter=1000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    score  = f1_score(y_test, y_pred, average='macro')
    class_1.append(score)
    
    #DecisionTree
    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    score  = f1_score(y_test, y_pred, average='macro')
    class_2.append(score)

#Print Table
table = PrettyTable(['', 'Logistic Regression', 'Decision Tree'])
table.add_row(['Average F1', "%.4f" % np.average(class_1), "%.4f" % np.average(class_2)])
print(table)

+------------+---------------------+---------------+
|            | Logistic Regression | Decision Tree |
+------------+---------------------+---------------+
| Average F1 |        0.9731       |     0.9530    |
+------------+---------------------+---------------+
