In [1]:
import os
import numpy as np
import skimage.color as color
import skimage.feature as feat
import matplotlib.pyplot as plt
import time
import sys
import sklearn.svm as svm
import sklearn.model_selection as mod_s
import sklearn.decomposition as dec


#opeen data file and return a dictionary
def upickle(file):
    import pickle
    with open(file,'rb') as fo:
        dict =pickle.load(fo, encoding='bytes')
    fo.closed
    return dict

def Load_CIFAR_10():
    for i in range(1, 6):
        path_train = os.path.join('cifar-10-batches-py', 'data_batch_%d' % (i))
        data_dict = upickle(path_train)
        image_training_input_data = data_dict[b'data'].astype('uint8')
        image_training_label = np.array(data_dict[b'labels'])
        if i == 1:
            input_training_data = image_training_input_data
            output_training_data = image_training_label
        else:
            input_training_data = np.concatenate((input_training_data, image_training_input_data), axis=0)
            output_training_data = np.concatenate((output_training_data, image_training_label), axis=0)

    path_test = os.path.join('cifar-10-batches-py', 'test_batch')
    data_dict = upickle(path_test)
    test_input_data = data_dict[b'data'].astype('uint8')
    test_label = np.array(data_dict[b'labels'])

    return input_training_data, output_training_data, test_input_data, test_label

def data_pre_processing():
    train_input, train_label, test_input, test_label = Load_CIFAR_10()
    #reshape data to 3 channnels
    reshape_train_input = train_input.reshape(50000,3,32,32).transpose(0,2,3,1).astype('uint8')
    reshape_test_input = test_input.reshape(10000,3,32,32).transpose(0,2,3,1).astype('uint8')
    
    re_train_input_gray = np.zeros((32, 32, 50000))
    re_test_input_gray = np.zeros((32, 32, 10000))
    
    for i in range(0, train_input.shape[0]):
        re_train_input_gray[:, :, i] = color.rgb2gray(reshape_train_input[i, :, :, :])
        
    for i in range(0, test_input.shape[0]):
        re_test_input_gray[:, :, i] = color.rgb2gray(reshape_test_input[i, :, :, :])
        
    train_input_gray = re_train_input_gray.transpose(2,0,1)
    test_input_gray = re_test_input_gray.transpose(2,0,1)
    #figure1=plt.figure()
    #plt.imshow(train_input_gray[49999, :, :], cmap='gray')
    #plt.show()
    #figure2=plt.figure()
    #plt.imshow(reshape_train_input[49999, :, :, :], cmap='gray')
    #plt.show()
    return train_input_gray, train_label, test_input_gray, test_label

def whitening(I):
    I -= np.mean(I, axis=0)
    covar = np.dot(I.T, I)/I.shape[0]
    U,S,V = np.linalg.svd(covar)
    w_matrix = np.dot(np.dot(U, np.diag(1./np.sqrt(S + 10e-5))), U.T)
    White_I = np.dot(I, w_matrix)
    return White_I


In [2]:
#additional for PCA
train_input_gray, train_label, test_input_gray, test_label = data_pre_processing()

hogs=[]
for i in range(0,train_input_gray.shape[0]):
    whiten_train_data = whitening(train_input_gray[i, :, :])
    hogfeature = feat.hog(whiten_train_data, pixels_per_cell=(4, 4))
    hogs.append(hogfeature)
hogs_array = np.reshape(np.array(hogs), (train_input_gray.shape[0], -1))
print(hogs_array.shape)

hogs_test =[]
for i in range(0,test_input_gray.shape[0]):
    whiten_test_data = whitening(test_input_gray[i, :, :])
    hogfeature = feat.hog(whiten_test_data, pixels_per_cell=(4, 4))
    hogs_test.append(hogfeature)
hogs_test_array = np.reshape(np.array(hogs_test), (test_input_gray.shape[0], -1))
print(hogs_test_array.shape)


C:\Users\user\Anaconda3\lib\site-packages\skimage\feature\_hog.py:150: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15. To supress this message specify explicitly the normalization method.
  skimage_deprecation)


(50000, 2916)


(10000, 2916)


In [21]:
#additional for PCA
total_array = np.concatenate((hogs_array,hogs_test_array),axis=0)
pca = dec.PCA(n_components=1100)
new_array = pca.fit_transform(total_array)
new_hogs_array = new_array[0:50000,:]
new_hogs_test_array = new_array[50000:60000,:]
print(new_hogs_array.shape)
print(new_hogs_test_array.shape)


(50000, 1100)
(10000, 1100)


In [22]:
# additional for PCA

train_input_gray, train_label, test_input_gray, test_label = data_pre_processing()

accuracy_results={}
best_svm = None
best_val = -1
count=0
training_time = 0
kf = mod_s.KFold(n_splits=5, shuffle=False)
for train_index, val_index in kf.split(new_hogs_array):
    start = time.clock()
    count +=1
    train_input_k, val_input_k = new_hogs_array[train_index], new_hogs_array[val_index]
    train_label_k, val_label_k = train_label[train_index], train_label[val_index]
    classifier = svm.LinearSVC()
    classifier.fit(train_input_k,train_label_k)
    end = time.clock()
    #print(classifier.get_params())
    train_pre = classifier.predict(train_input_k)
    val_predict = classifier.predict(val_input_k)
    train_acc = np.mean(np.uint(train_pre == train_label_k))
    val_acc = np.mean(np.uint(val_predict == val_label_k))
    accuracy_results[count] = (train_acc, val_acc)
    if val_acc > best_val:
        best_val = val_acc
        best_svm = classifier
    else:
        pass
    training_time += (end-start)
    print('the %d iteration,Training time with 5 fold linearsvm is:%ss'%(count, str(training_time)))

for i in sorted(accuracy_results.keys()):
    train_acc, val_acc = accuracy_results[i]
    print('the %d iteration, the training accuracy is: %f, the validation accuracy \
          is: %f. \n' %(i, train_acc, val_acc))   
    


the 1 iteration,Training time with 5 fold linearsvm is:64.7690440932879s


the 2 iteration,Training time with 5 fold linearsvm is:117.40217669588583s


the 3 iteration,Training time with 5 fold linearsvm is:171.71480288910834s


the 4 iteration,Training time with 5 fold linearsvm is:224.57500061296878s


the 5 iteration,Training time with 5 fold linearsvm is:279.4217728889771s
the 1 iteration, the training accuracy is: 0.484025, the validation accuracy           is: 0.439300. 

the 2 iteration, the training accuracy is: 0.486625, the validation accuracy           is: 0.424000. 

the 3 iteration, the training accuracy is: 0.482400, the validation accuracy           is: 0.438600. 

the 4 iteration, the training accuracy is: 0.487650, the validation accuracy           is: 0.431700. 

the 5 iteration, the training accuracy is: 0.484075, the validation accuracy           is: 0.438300. 



In [None]:
#additional for PCA
start = time.clock()
test_predict = best_svm.predict(new_hogs_test_array)
test_acc = np.mean(np.uint(test_predict == test_label))
end = time.clock()
test_time = end-start
print('Testing time  is:%ss, testing accuracy is: %f'%(str(end-start), test_acc))


Below codes are features with no PCA

In [4]:
#Non PCA

start = time.clock()
train_input_gray, train_label, test_input_gray, test_label = data_pre_processing()

hogs_train =[]
hogs_val =[]
accuracy_results={}
best_svm = None
best_val = -1
count=0
kf = mod_s.KFold(n_splits=5, shuffle=False)
for train_index, val_index in kf.split(train_input_gray):
    count +=1
    train_input_k, val_input_k = train_input_gray[train_index], train_input_gray[val_index]
    train_label_k, val_label_k = train_label[train_index], train_label[val_index]
    for i in range(0, train_input_k.shape[0]):
        whiten_train_data = whitening(train_input_k[i, :, :])
        hogfeature = feat.hog(whiten_train_data, pixels_per_cell=(4, 4))
        hogs_train.append(hogfeature)
    hogs_array_train = np.reshape(np.array(hogs_train),(train_input_k.shape[0], -1))
    for i in range(0, val_input_k.shape[0]):
        whiten_train_data = whitening(val_input_k[i, :, :])
        hogfeature = feat.hog(whiten_train_data , pixels_per_cell=(4, 4))
        hogs_val.append(hogfeature)
    hogs_array_val = np.reshape(np.array(hogs_val),(val_input_k.shape[0], -1))
    classifier = svm.LinearSVC()
    classifier.fit(hogs_array_train,train_label_k)
    #print(classifier.get_params())
    train_pre = classifier.predict(hogs_array_train)
    val_predict = classifier.predict(hogs_array_val)
    train_acc = np.mean(np.uint(train_pre == train_label_k))
    val_acc = np.mean(np.uint(val_predict == val_label_k))
    accuracy_results[count] = (train_acc, val_acc)
    if val_acc > best_val:
        best_val = val_acc
        best_svm = classifier
    else:
        pass
    hogs_train =[]
    hogs_val =[]

end = time.clock()
training_time = end-start
print('Training time with 5 fold linearsvm is:%ss'%(str(end-start)))
for i in sorted(accuracy_results.keys()):
    train_acc, val_acc = accuracy_results[i]
    print('the %d iteration, the training accuracy is: %f, the validation accuracy \
          is: %f. \n' %(i, train_acc, val_acc))


C:\Users\user\Anaconda3\lib\site-packages\skimage\feature\_hog.py:150: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15. To supress this message specify explicitly the normalization method.
  skimage_deprecation)


Testing time  is:10.310241030834732s, testing accuracy is: 0.441300


In [7]:

hogs_test =[]
for i in range(0, test_input_gray.shape[0]):
    whiten_data_test = whitening(test_input_gray[i,:,:])
    hogfeature_test = feat.hog(whiten_data_test, pixels_per_cell=(4, 4))
    hogs_test.append(hogfeature_test)
hogs_array_test = np.reshape(np.array(hogs_test),(np.int(test_input_gray.shape[0]), -1))
start = time.clock()
test_predict = best_svm.predict(hogs_array_test)
test_acc = np.mean(np.uint(test_predict == test_label))
end = time.clock()
test_time = end-start
print('Testing time  is:%ss, testing accuracy is: %f'%(str(end-start), test_acc))

C:\Users\user\Anaconda3\lib\site-packages\skimage\feature\_hog.py:150: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15. To supress this message specify explicitly the normalization method.
  skimage_deprecation)


Testing time  is:10.310241030834732s, testing accuracy is: 0.441300
