In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Classifiers
from sklearn.neighbors import KNeighborsClassifier # KNN Classifier
from sklearn import svm # SVM Classifier
from sklearn.tree import DecisionTreeClassifier # Decision Tree Classifier
from sklearn.naive_bayes import GaussianNB # Naive Bayes Classifier (Gaussian) 

# Performance Metrics
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# K-Fold Cross Validation
from sklearn.model_selection import cross_val_score

<h1 color="yellow">Preprocessing</h1>

In [None]:
iris_data_set = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets/Iris.csv')

In [None]:
iris_data_set.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
iris_data_set = iris_data_set.drop('Id', axis=1)

In [None]:
le = LabelEncoder()
iris_data_set['Species'] = le.fit_transform(iris_data_set['Species'])

In [None]:
iris_data_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB


In [None]:
iris_data_set.isnull().sum()

SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [None]:
iris_data_set['Species'].value_counts()

0    50
1    50
2    50
Name: Species, dtype: int64

DATASET DIVISION

In [None]:
x = iris_data_set.iloc[:,:-1].values
y = iris_data_set.iloc[:, -1].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 0)
print(x_test)

[[5.8 2.8 5.1 2.4]
 [6.  2.2 4.  1. ]
 [5.5 4.2 1.4 0.2]
 [7.3 2.9 6.3 1.8]
 [5.  3.4 1.5 0.2]
 [6.3 3.3 6.  2.5]
 [5.  3.5 1.3 0.3]
 [6.7 3.1 4.7 1.5]
 [6.8 2.8 4.8 1.4]
 [6.1 2.8 4.  1.3]
 [6.1 2.6 5.6 1.4]
 [6.4 3.2 4.5 1.5]
 [6.1 2.8 4.7 1.2]
 [6.5 2.8 4.6 1.5]
 [6.1 2.9 4.7 1.4]
 [4.9 3.1 1.5 0.1]
 [6.  2.9 4.5 1.5]
 [5.5 2.6 4.4 1.2]
 [4.8 3.  1.4 0.3]
 [5.4 3.9 1.3 0.4]
 [5.6 2.8 4.9 2. ]
 [5.6 3.  4.5 1.5]
 [4.8 3.4 1.9 0.2]
 [4.4 2.9 1.4 0.2]
 [6.2 2.8 4.8 1.8]
 [4.6 3.6 1.  0.2]
 [5.1 3.8 1.9 0.4]
 [6.2 2.9 4.3 1.3]
 [5.  2.3 3.3 1. ]
 [5.  3.4 1.6 0.4]]


# Classification using KNN

[Accuracy Score, Precision Score, Recall Score]
<hr width="38%" align="left">
|&emsp;n-Value&emsp;|&emsp;Euclidean Distance&emsp;|&emsp;Minkowski Distance&emsp;|<br>
<hr width="38%" align="left">
|&emsp;&emsp;&nbsp;&nbsp;1&emsp;&emsp;&nbsp;|&emsp;&nbsp;&nbsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;|&emsp;&emsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;&nbsp;|<br>
|&emsp;&emsp;&nbsp;&nbsp;5&emsp;&emsp;&nbsp;|&emsp;[ 0.96, 0.95, 0.97 ] &emsp;&nbsp;|&emsp;&emsp;[ 0.96, 0.95, 0.97 ]&emsp;&nbsp;&nbsp;|<br>
|&emsp;&emsp;&nbsp;10&emsp;&emsp;|&emsp;&nbsp;&nbsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;|&emsp;&emsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;&nbsp;|<br>
<hr width="38%" align="left">


n-value = 1

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 1, metric="minkowski")
knn_classifier.fit(x_train, y_train)
y_pred = knn_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 1, metric="euclidean")
knn_classifier.fit(x_train, y_train)
y_pred = knn_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


n-value = 5

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 5, metric="minkowski")
knn_classifier.fit(x_train, y_train)
y_pred = knn_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 12  1]
 [ 0  0  6]]
0.9666666666666667
0.9523809523809524
0.9743589743589745


In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 5, metric="euclidean")
knn_classifier.fit(x_train, y_train)
y_pred = knn_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 12  1]
 [ 0  0  6]]
0.9666666666666667
0.9523809523809524
0.9743589743589745


n-value = 10

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 10, metric="minkowski")
knn_classifier.fit(x_train, y_train)
y_pred = knn_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors = 10, metric="euclidean")
knn_classifier.fit(x_train, y_train)
y_pred = knn_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


# Classification using Support Vector Machine

[Accuracy Score, Precision Score, Recall Score]
<hr width="22.5%" align="left">
|&emsp;Kernal &emsp;&nbsp;|&emsp;&emsp;Metric Results&emsp;&emsp;|<br>
<hr width="22.5%" align="left">
|&emsp;Linear&emsp;&nbsp;&nbsp;&nbsp;|&emsp;&nbsp;&nbsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;|<br>
|&emsp;&nbsp;&nbsp;RBF&emsp;&emsp;&nbsp;|&emsp;&nbsp;&nbsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;|<br>
|&nbsp;Polynomial&nbsp;|&emsp;&nbsp;&nbsp;&emsp;[1.0, 1.0, 1.0]&emsp;&emsp;&nbsp;|<br>
<hr width="22.5%" align="left">


Polynomial Kernal

In [None]:
svm_classifier = svm.SVC(kernel="poly")
svm_classifier.fit(x_train, y_train)

y_pred = svm_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


Linear Kernal

In [None]:
svm_classifier = svm.SVC(kernel="linear")
svm_classifier.fit(x_train, y_train)

y_pred = svm_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


Radial Basis Function Kernal

In [None]:
svm_classifier = svm.SVC(kernel="rbf")
svm_classifier.fit(x_train, y_train)

y_pred = svm_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


# Classification using Decision Tree

[Accuracy Score, Precision Score, Recall Score]
<hr width="27%" align="left">
|&emsp;Criterion&nbsp;&nbsp;|&emsp;Splitter&emsp;&nbsp;|&emsp;Metric Result&emsp;|<br>
|&emsp;entropy&emsp;|&emsp;&nbsp;&nbsp;best&emsp;&emsp;|&emsp;[1.0, 1.0, 1.0]&emsp;&nbsp;|<br>
|&emsp;entropy&emsp;|&emsp;random&emsp;|&nbsp;[0.96, 0.97, 0.94]&nbsp;&nbsp;|<br>
<hr width="27%" align="left">


Information Gain, with Best Splitter

In [None]:
dt_classifier = DecisionTreeClassifier(criterion="entropy", splitter="best")
dt_classifier.fit(x_train, y_train)
y_pred = dt_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


Information Gain with Random Splitter

In [None]:
dt_classifier = DecisionTreeClassifier(criterion="entropy", splitter="random")
dt_classifier.fit(x_train, y_train)
y_pred = dt_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]
1.0
1.0
1.0


# Classification using Naive Bayes

[Accuracy Score, Precision Score, Recall Score]<br>
[0.96, 0.97. 0.94]
<hr width="38%" align="left">


In [None]:
gnb_classifier = GaussianNB()

In [None]:
gnb_classifier.fit(x_train, y_train)
y_pred = gnb_classifier.predict(x_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
a_score = accuracy_score(y_test, y_pred)
p_score = precision_score(y_test, y_pred, average="macro")
r_score = recall_score(y_test, y_pred, average="macro")

print(cm, a_score, p_score, r_score, sep="\n")

[[11  0  0]
 [ 0 13  0]
 [ 0  1  5]]
0.9666666666666667
0.9761904761904763
0.9444444444444445


# K Fold Cross Validation
> k=5
  - KNN Classifier --> Accuracy = 0.98, SD = 0.03
  - SVM Classifier --> Accuracy = 0.97, SD = 0.02
  - Decision Tree Classifier --> Accuracy = 0.92, SD = 0.07
  - Naive Bayes Classifier --> Accuracy = 0.95, SD = 0.03

> k=4
  - KNN Classifier --> Accuracy = 0.97, SD = 0.01
  - SVM Classifier --> Accuracy = 0.97, SD = 0.02
  - Decision Tree Classifier --> Accuracy = 0.95, SD = 0.01
  - Naive Bayes Classifier --> Accuracy = 0.95, SD = 0.03

k = 5

In [None]:
knn_scores = cross_val_score(knn_classifier, x, y, cv=5)
print("%0.2f accuracy with a standard deviation of %0.2f" % (knn_scores.mean(), knn_scores.std()))

0.98 accuracy with a standard deviation of 0.03


In [None]:
svm_scores = cross_val_score(svm_classifier, x, y, cv=5)
print("%0.2f accuracy with a standard deviation of %0.2f" % (svm_scores.mean(), svm_scores.std()))

0.97 accuracy with a standard deviation of 0.02


In [None]:
dt_scores = cross_val_score(dt_classifier, x, y, cv=5)
print("%0.2f accuracy with a standard deviation of %0.2f" % (dt_scores.mean(), dt_scores.std()))

0.93 accuracy with a standard deviation of 0.05


In [None]:
gnb_scores = cross_val_score(gnb_classifier, x, y, cv=5)
print("%0.2f accuracy with a standard deviation of %0.2f" % (gnb_scores.mean(), gnb_scores.std()))

0.95 accuracy with a standard deviation of 0.03


k = 4

In [None]:
knn_scores = cross_val_score(knn_classifier, x, y, cv=4)
print("%0.2f accuracy with a standard deviation of %0.2f" % (knn_scores.mean(), knn_scores.std()))

0.97 accuracy with a standard deviation of 0.01


In [None]:
svm_scores = cross_val_score(svm_classifier, x, y, cv=4)
print("%0.2f accuracy with a standard deviation of %0.2f" % (svm_scores.mean(), svm_scores.std()))

0.97 accuracy with a standard deviation of 0.02


In [None]:
dt_scores = cross_val_score(dt_classifier, x, y, cv=4)
print("%0.2f accuracy with a standard deviation of %0.2f" % (dt_scores.mean(), dt_scores.std()))

0.95 accuracy with a standard deviation of 0.04


In [None]:
gnb_scores = cross_val_score(gnb_classifier, x, y, cv=4)
print("%0.2f accuracy with a standard deviation of %0.2f" % (gnb_scores.mean(), gnb_scores.std()))

0.95 accuracy with a standard deviation of 0.03
