In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
np.random.seed(1)

In [2]:
# filename structure
path = 'D:/OneDrive/Nam_4_1/ImageProcessing/PCAandRecognization/YALE/unpadded/'
ids = range(1, 16)  # 15 persons
states = ['centerlight', 'glasses', 'happy', 'leftlight',
          'noglasses', 'normal', 'rightlight','sad',
          'sleepy', 'surprised', 'wink']
prefix = 'subject'
surfix = '.pgm'

In [3]:
# data dimension
h = 116
w = 98
D = h * w
N = len(states) * 15
K = 100

In [4]:
# collect all data
import imageio
# X = np.zeros((D, N))
X_train = np.zeros((D, N - 2 * 15))
X_test = np.zeros((D, 2 * 15))
# cnt = 0
a = 0
b = 0
for person_id in range(1, 16):
	for i, state in enumerate(states):
		fn = path + prefix + str(person_id).zfill(2) + '.' + state + surfix
		if i < 2:
			X_test[:, a] = imageio.imread(fn).reshape(D)
			a += 1
		else:
			X_train[:, b] = imageio.imread(fn).reshape(D)
			b += 1
		# cnt += 1

In [5]:
X_train.shape

(11368, 135)

In [6]:
X_test.shape

(11368, 30)

In [7]:
mean = X_train.mean(axis=1)

In [8]:
mean.shape

(11368,)

In [9]:
# doing PCA, note that each row is a datapoint
from sklearn.decomposition import PCA
pca = PCA(n_components=K)
pca.fit(X_train.T)

PCA(copy=True, iterated_power='auto', n_components=100, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [10]:
# projection matrix
U = pca.components_.T

In [11]:
U.shape

(11368, 100)

In [None]:
# see reconstruction of the first 6 persons
for person_id in range(1, 7):
	for state in ['centerlight']:
		fn = path + prefix + str(person_id).zfill(2) + '.' + state + surfix
		im = imageio.imread(fn)
		plt.axis('off')
		#         plt.imshow(im, interpolation='nearest' )
		f1 = plt.imshow(im, interpolation='nearest')
		f1.axes.get_xaxis().set_visible(False)
		f1.axes.get_yaxis().set_visible(False)
		plt.gray()
		fn = 'ori' + str(person_id).zfill(2) + '.png'
		plt.savefig(fn, bbox_inches='tight', pad_inches=0)
		plt.show()
		# reshape and subtract mean, don't forget 
		x = im.reshape(D, 1) - pca.mean_.reshape(D, 1)
		# encode
		z = U.T.dot(x)
		#decode
		x_tilde = U.dot(z) + pca.mean_.reshape(D, 1)

		# reshape to orginal dim
		im_tilde = x_tilde.reshape(116, 98)
		plt.axis('off')
		#         plt.imshow(im_tilde, interpolation='nearest' )
		f1 = plt.imshow(im_tilde, interpolation='nearest')
		f1.axes.get_xaxis().set_visible(False)
		f1.axes.get_yaxis().set_visible(False)
		plt.gray()
		fn = 'res' + str(person_id).zfill(2) + '.png'
		plt.savefig(fn, bbox_inches='tight', pad_inches=0)
		plt.show()

In [12]:
pca.mean_

array([33.92592593, 71.31851852, 92.72592593, ..., 74.04444444,
       40.44444444, 34.06666667])

In [13]:
mean

array([33.92592593, 71.31851852, 92.72592593, ..., 74.04444444,
       40.44444444, 34.06666667])

In [14]:
Mean2 = np.zeros((X_train.shape[0], X_train.shape[1]))
for i in range(X_train.shape[1]):
    Mean2[:, i] = mean

In [15]:
# new data
Z = U.T.dot(X_train - Mean2)

In [16]:
Z.shape

(100, 135)

In [17]:
X_train_PCA = Z.T
X_train_PCA.shape

(135, 100)

In [20]:
# create labels
y_train = []
y_test = []
for i in range(15):
    for j in range(11):
        if j < 2:
            y_test.append(i + 1)
        else:
            y_train.append(i + 1)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

In [21]:
y_train

array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,
        4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
        8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
       10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12,
       12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14,
       14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15])

In [22]:
y_test


array([ 1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,  9,
        9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15])

In [23]:
s = np.arange(X_train_PCA.shape[0])

In [24]:
s

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134])

In [25]:
np.random.shuffle(s)
s

array([ 54,   9, 103,  22, 102,  16,   8,  63,  64,  73,  74,  40,  77,
       125,   3,  48, 114,  25,  34,  95,  30,  97,  68,  14, 110,  70,
       101,  31, 129,   6,  90, 123,  41, 116,  81, 111,  24,  98,  37,
       131,  92,  85,  94,  84,  75,  49,  89, 106, 107,  59,  26,  62,
       126,  96, 115, 108,  87,  35,  72,  38,  23, 130,  12,  52,   5,
        19, 122,  39,   0,  60,  50, 104,  91, 113,  51,  57,  17,  80,
        15,  42,  44,  55,  78, 124,   4,  47, 134,  86,  28,  88,  43,
        56,  45, 120,  11,   1,  65,  71,  79,  69,  53,   2,  33,  67,
        21, 119,  82,  76, 109,  36,  93,  20,  32, 118, 132, 112,  29,
        27, 117, 100, 127,  18, 105,  99,  66,  83, 121,   7,  13,  46,
       133,  58, 128,  61,  10])

In [26]:
X_train_PCA = X_train_PCA[s, :]

In [27]:
y_train = y_train[s]

In [28]:
s2 = np.arange(X_test.shape[1])
np.random.shuffle(s2)
X_test = X_test[:, s2]
y_test = y_test[s2]

In [29]:
s2

array([23, 27,  3, 14, 20,  5, 12, 24, 25,  8, 18, 29,  2, 19, 16, 13, 22,
        6,  9, 28, 10, 21, 11,  1, 26,  7, 17,  4, 15,  0])

In [30]:
X_test.shape

(11368, 30)

In [31]:
Mean = np.zeros((X_test.shape[0], X_test.shape[1]))
for i in range(X_test.shape[1]):
    Mean[:, i] = mean

In [32]:
Mean.shape

(11368, 30)

In [33]:
X_test.shape

(11368, 30)

In [84]:
(U.T.dot(X_test - Mean)).shape

(100, 30)

In [34]:
clf = neighbors.KNeighborsClassifier(n_neighbors=1, p=2)
clf.fit(X_train_PCA, y_train)
y_pred = clf.predict((U.T.dot(X_test - Mean)).T)

In [35]:
print(y_pred)
print(y_test)

[12 14  2  8 11  3 14 13 13  5 10 15  2 10  9  7 12  4  5 15  6 11  6  1
 14  4  9  3  8  1]
[12 14  2  8 11  3  7 13 13  5 10 15  2 10  9  7 12  4  5 15  6 11  6  1
 14  4  9  3  8  1]


In [36]:
# evaluation
print("Accuracy of 1 NN: %.2f %%" %(100 * accuracy_score(y_test, y_pred)))

Accuracy of 1 NN: 96.67 %


In [88]:
clf2 = neighbors.KNeighborsClassifier(n_neighbors=9, p=2, weights='distance')
clf2.fit(X_train_PCA, y_train)
y_pred2 = clf2.predict((U.T.dot(X_test - Mean)).T)
# evaluation
print("Accuracy of 10 NN: %.2f %%" %(100 * accuracy_score(y_test, y_pred2)))


Accuracy of 10 NN: 90.00 %


In [89]:
print(y_pred2)
print(y_test)

[ 1  1  2  2  3  3  4  4  5  5  6  6 14  7  4 15  9  9 10 10 11 11 12 12
 13 13 14 14 15 15]
[ 1  1  2  2  3  3  4  4  5  5  6  6  7  7  8  8  9  9 10 10 11 11 12 12
 13 13 14 14 15 15]


In [120]:
print(X_train.shape)
print(y_train.shape)
print(X_train_PCA.shape)

(11368, 135)
(135,)
(135, 100)


In [38]:
###############################
# Without using PCA

clf_noPCA = neighbors.KNeighborsClassifier(n_neighbors=1, p=2)
clf_noPCA.fit(X_train.T, y_train)
y_pred_noPCA = clf_noPCA.predict(X_test.T)
print(y_pred_noPCA)
print(y_test)
print("Accuracy of 1 NN: %.2f %%" %(100 * accuracy_score(y_pred_noPCA, y_test)))

[ 1 12  1  3  8  4 12  3  3  5 15  7  9  4  5 13  8 13 15  7  7  6 11  1
  4  1  7 13 14  1]
[12 14  2  8 11  3  7 13 13  5 10 15  2 10  9  7 12  4  5 15  6 11  6  1
 14  4  9  3  8  1]
Accuracy of 1 NN: 10.00 %
