# Import Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import cluster
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Clustering

## 1. Load Data

In [None]:
data = pd.read_csv("iris-train.csv", header=0, skiprows=[1,2], usecols = ['sepal length', 'sepal width', 'petal length', 'petal width'])

## 2. View Data

In [None]:
data

## 3. Clustering with k-Means

In [None]:
kmeans = cluster.KMeans(n_clusters=3, random_state=0)
kmeans.fit(data)
kmeans.labels_

## 4. Plot

In [None]:
%matplotlib inline
target = pd.DataFrame(kmeans.labels_, columns = ['cluster'])
dataset = pd.concat([data, target], axis = 1)
plt.rcParams["figure.figsize"] = [12, 9]
colors = ['r', 'g', 'b']
for i in range(0,3):
    sub_data = dataset.loc[dataset['cluster'] == i]
    plt.scatter(sub_data["sepal length"], sub_data["sepal width"], color=colors[i], s= 40, label="cluster "+str(i))
plt.xlabel('Sepal length',fontsize=30)
plt.ylabel('Sepal width', fontsize=30)
plt.legend()
plt.show()

# Classification

## 1. Load Data

In [None]:
train_data = pd.read_csv("iris-train.csv", header=0, skiprows=[1,2])
test_data = pd.read_csv("iris-test.csv", header=0, skiprows=[1,2])

## 2. Preprocess

In [None]:
class2num = {'Iris-versicolor': 0,
             'Iris-setosa': 1, 
             'Iris-virginica': 2}
train_x = train_data[['sepal length', 'sepal width', 'petal length', 'petal width']]
test_x  = test_data[['sepal length', 'sepal width', 'petal length', 'petal width']]
train_y = np.array([class2num[label] for label in train_data['class']])
test_y = np.array([class2num[label] for label in test_data['class']])

## 3. Train Logistic Regression Model

In [None]:
clf = LogisticRegression(random_state=0)
clf.fit(train_x, train_y)

## 4. Prediction

In [None]:
pred_y = clf.predict(test_x)

## 5. Evaluation

In [None]:
cm = confusion_matrix(test_y, pred_y)
disp = ConfusionMatrixDisplay(cm)
disp.plot()