# K-Nearest Neighbor

In [1]:
import numpy as np
from sklearn import neighbors, datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
iris = datasets.load_iris()
# take the first two features
x = iris.data[:, :2]
y = iris.target

# train-test-split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# training data normalization
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

# build model
model = neighbors.KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)

# testing data normalization
x_test = scaler.transform(x_test)

# predict y
y_pred = model.predict(x_test)

# evaluate
accuracy = accuracy_score(y_test, y_pred)
num_correct_samples = accuracy_score(y_test, y_pred, normalize=False)
con_matrix = confusion_matrix(y_test, y_pred)

print(f'number of correct samples: {num_correct_samples}')
print(f'accuracy: {accuracy}')
print(f'confusion matrix: \n{con_matrix}')

number of correct samples: 23
accuracy: 0.7666666666666667
confusion matrix: 
[[ 8  0  0]
 [ 0  4  2]
 [ 0  5 11]]


# Decision Tree (CART)

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing, datasets, tree
import graphviz

import os
os.environ["PATH"] += os.pathsep + '/Users/yuhsinwang/Desktop/III/Machine Learning/practice 2'

In [4]:
iris = datasets.load_iris()
x = iris.data
y = iris.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)

# pre-pruning: 建議設定 max_depth (預設為None)
model = DecisionTreeClassifier(max_depth=3)
model.fit(x_train, y_train)

x_test = scaler.transform(x_test)
y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
num_correct_samples = accuracy_score(y_test, y_pred, normalize=False)
con_matrix = confusion_matrix(y_test, y_pred)

print(f'number of correct samples: {num_correct_samples}')
print(f'accuracy: {accuracy}')
print(f'con_matrix: \n{con_matrix}')

# output tree structure
# dot_data = tree.export_graphviz(model)
# graph = graphviz.Source(dot_data)
# graph.render('iris')

number of correct samples: 27
accuracy: 0.9
con_matrix: 
[[11  0  0]
 [ 0 13  3]
 [ 0  0  3]]
