In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
np.random.seed(1)

In [3]:
# filename structure
path = 'D:/OneDrive/Nam_4_1/ImageProcessing/PCAandRecognization/YALE/unpadded/'
ids = range(1, 16)  # 15 persons
states = ['centerlight', 'glasses', 'happy', 'leftlight',
          'noglasses', 'normal', 'rightlight','sad',
          'sleepy', 'surprised', 'wink']
prefix = 'subject'
surfix = '.pgm'

In [4]:
# data dimension
h = 116
w = 98
D = h * w
N = len(states) * 15
K = 100

In [5]:
# collect all data
import imageio
X = np.zeros((D, N))
cnt = 0
for person_id in range(1, 16):
	for state in states:
		fn = path + prefix + str(person_id).zfill(2) + '.' +state+surfix
		X[:, cnt] = imageio.imread(fn).reshape(D)
		cnt += 1

In [6]:
X.shape

(11368, 165)

In [7]:
# doing PCA, note that each row is a datapoint
from sklearn.decomposition import  PCA
pca = PCA(n_components=K)
pca.fit(X.T)

PCA(copy=True, iterated_power='auto', n_components=100, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [8]:
# projection matrix
U = pca.components_.T


(11368, 165)

In [9]:
U.shape

(11368, 100)

In [10]:
# new data
Z = U.T.dot(X)

In [11]:
Z.shape

(100, 165)

In [14]:
X_PCA = Z.T
X_PCA.shape

(165, 100)

In [18]:
# create labels
y = []
for i in range(15):
    for j in range(11):
        y.append(i)
y = np.asarray(y)

In [19]:
y.shape

(165,)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_PCA, y, test_size=50)

In [55]:
clf = neighbors.KNeighborsClassifier(n_neighbors=1, p=2)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [56]:
print(y_pred)
print(y_test)

[ 8 14  6  0  3 10  1  4  1  0 10 12  4 12  5  6  3  2 13 13  5 10  9 13
  3 14 14  1 13 12 10  1  5 10 11  3 10 13 11  8  0  5  5  0  5  5  6  4
  5 13]
[ 4 14  6  0  3 10  1  4  1  0 10 12  8 12  7  6  3  2 13  9  5 10  9 13
  3 14  2  1 13 12 10  9  5 10 11 13 10 13 11  8  0  5  5  0  5  7  6  4
  5 13]


In [57]:
# evaluation
print("Accuracy of 1 NN: %.2f %%" %(100 * accuracy_score(y_test, y_pred)))

Accuracy of 5 NN: 84.00 %


In [54]:
clf2 = neighbors.KNeighborsClassifier(n_neighbors=10, p=2, weights='distance')
clf2.fit(X_train, y_train)
y_pred2 = clf2.predict(X_test)
# evaluation
print("Accuracy of 10 NN: %.2f %%" %(100 * accuracy_score(y_test, y_pred2)))


Accuracy of 5 NN: 88.00 %


In [58]:
print(y_pred2)
print(y_test)

[ 5 14  6  0  3 10  1  4  1  0 10 12 11 12  1  6  3  2 13  9  5 11  9 13
  3 14  2  1 13 12 10  9  5 10 11  3 10 13 11  8  0  5  5  0  5 14  6  4
  5 13]
[ 4 14  6  0  3 10  1  4  1  0 10 12  8 12  7  6  3  2 13  9  5 10  9 13
  3 14  2  1 13 12 10  9  5 10 11 13 10 13 11  8  0  5  5  0  5  7  6  4
  5 13]


In [59]:
###############################
# Does not using PCA
X_noPCA = X.T
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_noPCA, y, test_size=50)
clf1 = neighbors.KNeighborsClassifier(n_neighbors=1, p=2)
clf1.fit(X_train1, y_train1)
y_pred1 = clf1.predict(X_test1)
print(y_pred1)
print(y_test1)
print("Accuracy of 1 NN: %.2f %%" %(100 * accuracy_score(y_test1, y_pred1)))

[ 7  1  2 13  0  3 14  5  3 13  3  5  5 14  9  8  8 10  8  1 11 12  5  1
  5  3 13 14  0 10 10  2  6  3  6 13 12  0 13  9 13  8  0  8  2  4  3  4
  5  5]
[ 7  1  2 13  0  0 14  5  3 13  3  5  5  6  9  8  8 10  8  1 11 12  5  1
  5  3  6 14  0 10 10 14 12 13  6 13  6  0 13  9 13  8  0  8  2  4 13  4
  5  5]
Accuracy of 1 NN: 84.00 %
