In [1]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import cluster
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

## 1. Load Iris Data

In [2]:
train_data = pd.read_csv("iris-train.csv", header=0, skiprows=[1])
test_data = pd.read_csv("iris-test.csv", header=0, skiprows=[1])
train_data

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,,,,,class
1,5.1,3.7,1.5,0.4,Iris-setosa
2,4.6,3.2,1.4,0.2,Iris-setosa
3,6.9,3.1,5.1,2.3,Iris-virginica
4,5.5,2.6,4.4,1.2,Iris-versicolor
...,...,...,...,...,...
116,4.5,2.3,1.3,0.3,Iris-setosa
117,5.0,2.3,3.3,1.0,Iris-versicolor
118,5.0,3.4,1.6,0.4,Iris-setosa
119,4.6,3.1,1.5,0.2,Iris-setosa


In [3]:
class2num = {'Iris-versicolor': 0,
             'Iris-setosa': 1, 
             'Iris-virginica': 2}

train_x = train_data[['sepal length', 'sepal width', 'petal length', 'petal width']]
test_x  = test_data[['sepal length', 'sepal width', 'petal length', 'petal width']]
train_y = np.array([class2num[label] for label in train_data['class']])
test_y = np.array([class2num[label] for label in test_data['class']])
test_y

KeyError: 'class'

## 2.1 Clustering with K-means

In [None]:
kmeans = cluster.KMeans(n_clusters=3, random_state=0)
kmeans.fit(train_x)
clusters = kmeans.predict(test_x)
clusters

In [None]:
cm = confusion_matrix(test_y, clusters)
disp = ConfusionMatrixDisplay(cm)
disp.plot()

## 2.2 Clustering with Hierarchical clustering

In [None]:
clustering = cluster.AgglomerativeClustering(n_clusters=3)
clusters = clustering.fit_predict(test_x)
clusters

In [None]:
class2num = {'Iris-versicolor': 2,
             'Iris-setosa': 1, 
             'Iris-virginica': 0}
true_y = np.array([class2num[label] for label in test_data['class']])

cm = confusion_matrix(true_y, clusters)
disp = ConfusionMatrixDisplay(cm)
disp.plot()

## 3.1 Classification with Logistic Regression

In [None]:
clf = LogisticRegression(random_state=0)
clf.fit(train_x, train_y)
pred_y = clf.predict(test_x)
pred_y

In [None]:
cm = confusion_matrix(test_y, pred_y)
disp = ConfusionMatrixDisplay(cm)
disp.plot()

## 3.2 Classification with Decision Tree

In [None]:
clf = tree.DecisionTreeClassifier()
clf.fit(train_x, train_y)
pred_y = clf.predict(test_x)

In [None]:
cm = confusion_matrix(test_y, pred_y)
disp = ConfusionMatrixDisplay(cm)
disp.plot()

In [None]:
plt.figure(figsize=(15, 12))
tree.plot_tree(clf)