In [1]:
import sys
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import scipy
import libsvm
from libsvm.svmutil import *

from PIL import Image

In [2]:
### Data processing

def read_image(dirpath):
    image_data = []
    
    for file in os.listdir(dirpath):
        if os.path.isfile(os.path.join(dirpath, file)):
            c = os.path.basename(file)
            filename = dirpath + '/' + c
            image = Image.open(filename).resize((32, 32)).convert('L')
            image_arr = np.array(image).reshape(1, 1024)
            image_data.append(image_arr)
    
    return image_data

def process_image(folderpath):
    data_all = []
    for i in range(1, 26):
        ### '/home/x/PR_CA/PIE/'
        dirpath = folderpath + str(i)  
        image_list = read_image(dirpath)
        data_image = np.concatenate(image_list, axis=0)
        data_all.append(data_image)
    return data_all
    
def train_test_split(data, label):
    proportion = 0.7
    lens = data.shape[0]
    print(lens)
    idx = random.sample(range(0, lens), int(proportion*lens))
    idx = sorted(idx)
    
    train_x, train_y, test_x, test_y = [], [], [], []
    for i in range(lens):
        if i in idx:
            train_x.append(data[i].reshape(1, -1))
            train_y.append(label[i].reshape(1, -1))
        else:
            test_x.append(data[i].reshape(1, -1))
            test_y.append(label[i].reshape(1, -1))
    
#     train_x = [data[i].reshape(1, -1) for i in idx]
#     train_y = [label[i].reshape(1, -1) for i in idx]
#     test_x = [data[i].reshape(1, -1) for i in range(lens) if i not in idx]
#     test_y = [label[i].reshape(1, -1) for i in range(lens) if i not in idx]
    
    trainx = np.concatenate(train_x)
    trainy = np.concatenate(train_y)
    testx = np.concatenate(test_x)
    testy = np.concatenate(test_y)
    return trainx, trainy, testx, testy



In [3]:
def PCA_(dataset, n_component):
    N, Fea = dataset.shape
    x_mean = np.mean(dataset, 0)
    x_norm = dataset - x_mean
    
    co_var = np.zeros([Fea, Fea])
    co_var = np.dot(x_norm.T, x_norm)
    
    eig_val, eig_vec = np.linalg.eig(co_var)
    eig_pair = [[np.abs(eig_val[i]), eig_vec[:, i]] for i in range(Fea)]
    
    feature = np.array([pair[1] for pair in eig_pair[:n_component]])
    data = np.dot(x_norm, feature.T)
    
    return np.real(data)
def PCA_Split(image_all, label_image, self_image, label_self, n_component):
    image_all = PCA_(image_all, n_component)
    self_image = PCA_(self_image, n_component)
    trainx, trainy, testx, testy = train_test_split(image_all, label_image)
    trainx_self, trainy_self, testx_self, testy_self = train_test_split(self_image, label_self)

    # Training data and testing data
    train_x = np.concatenate([trainx, trainx_self])
    train_y = np.concatenate([trainy, trainy_self])
    test_x = np.concatenate([testx, testx_self])
    test_y = np.concatenate([testy, testy_self])
    data_pro = [train_x, train_y, test_x, test_y]
    return data_pro


In [4]:
### Data

folderpath = 'PIE/'
data_all = process_image(folderpath)
image_all = np.concatenate(data_all)
label_image = np.arange(0, 25, 1).repeat(170)

self_folder = 'PIE/0'
self_image = np.concatenate(read_image(self_folder))
label_self = np.array([25]).repeat(10)

trainx, trainy, testx, testy = train_test_split(image_all, label_image)
trainx_self, trainy_self, testx_self, testy_self = train_test_split(self_image, label_self)

# Training data and testing data
train_x = np.concatenate([trainx, trainx_self])
train_y = np.concatenate([trainy, trainy_self])
test_x = np.concatenate([testx, testx_self])
test_y = np.concatenate([testy, testy_self])

data_pro_80 = PCA_Split(image_all, label_image, self_image, label_self, 80)
data_pro_200 = PCA_Split(image_all, label_image, self_image, label_self, 200)

4250
10
4250
10
4250
10


In [5]:
### Libsvm
def libsvm(data_pro, c):
    lab = data_pro[1]
    lab = [d[0] for d in lab]

    datas = data_pro[0]
    datas = [datas[i,:].tolist() for i in range(datas.shape[0])]

    t_lab = data_pro[3]
    t_lab = [d[0] for d in t_lab]

    t_datas = data_pro[2]
    t_datas = [t_datas[i,:].tolist() for i in range(t_datas.shape[0])]

    pa = '-t 0 -c ' + str(c) + ' -b 1' 
    prob  = svm_problem(lab, datas)
    param = svm_parameter(pa)
    m = svm_train(prob, param)
    p_label, p_acc, p_val = svm_predict(t_lab, t_datas, m)



In [6]:
print("Dimension of processed data is 80, penalty parameter is 0.01")
libsvm(data_pro_80, 0.01)
print("Dimension of processed data is 80, penalty parameter is 0.1")
libsvm(data_pro_80, 0.1)
print("Dimension of processed data is 80, penalty parameter is 1")
libsvm(data_pro_80, 1)

print('\n')

print("Dimension of processed data is 200, penalty parameter is 0.01")
libsvm(data_pro_200, 0.01)
print("Dimension of processed data is 200, penalty parameter is 0.1")
libsvm(data_pro_200, 0.1)
print("Dimension of processed data is 200, penalty parameter is 1")
libsvm(data_pro_200, 1)



Dimension of processed data is 80, penalty parameter is 0.01
Model supports probability estimates, but disabled in predicton.
Accuracy = 98.2003% (1255/1278) (classification)
Dimension of processed data is 80, penalty parameter is 0.1
Model supports probability estimates, but disabled in predicton.
Accuracy = 98.2003% (1255/1278) (classification)
Dimension of processed data is 80, penalty parameter is 1
Model supports probability estimates, but disabled in predicton.
Accuracy = 98.2003% (1255/1278) (classification)


Dimension of processed data is 200, penalty parameter is 0.01
Model supports probability estimates, but disabled in predicton.
Accuracy = 98.8263% (1263/1278) (classification)
Dimension of processed data is 200, penalty parameter is 0.1
Model supports probability estimates, but disabled in predicton.
Accuracy = 98.8263% (1263/1278) (classification)
Dimension of processed data is 200, penalty parameter is 1
Model supports probability estimates, but disabled in predicton.
Ac