In [1]:
# import necessary python packages

import numpy as np
import random
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from os import listdir
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.callbacks import EarlyStopping, TensorBoard
import pickle
import gzip

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Define functions to use

In [2]:
def softmax(z):
    """ The logistic softmax function."""
    
    return np.exp(z)/sum(np.exp(z))

def EMS_logistic(weight,x_input,y_input,label_input,lamb):
    """ This function returns the EMS for categorical cross entropy and accuracy for logistic regression.
    weight: the weight matrix for logistic regression.
    x_input: the input data matrix.
    y_input: the traget matrix.
    label_input: the label vector.
    lamb: regularization factor."""
    
    y_pred = softmax(weight.dot(x_input.T)).T
    entropy = 0
    for i in range(len(label_input)):
        entropy += -np.dot(y_input[i],np.log(y_pred[i]))
    entropy = np.sqrt(2*(entropy+np.sqrt(np.sum(weight**2))*lamb) / len(label_input))
    count = 0 
    for i in range(len(label_input)):
        if np.argmax(y_pred[i]) == label_input[i]:
            count += 1
    return entropy, count/len(label_input)

    
def logistic_regression(x_train, y_train, lamb, lr, epoch, silent=True):
    """ This function performs the stochastic gradient descent solution for weight w on logistic regression.
    Inputs:
    x_train: Training data.
    y_train: Trainig label.
    lamb, lr: regularization term and learning rate, respectively.
    epoch: number of epochs ro run.
    silent: whether to output the training progress."""
    
    w = np.random.uniform(low=0.0, high=1.0, size=(y_train.shape[1], x_train.shape[1]))
    for j in range(epoch):
        sum_error = 0
        rand_index = np.random.permutation(list(range(y_train.shape[0])))
        for i in rand_index:        
            E_del = -np.outer((y_train[i]-softmax(w.dot(x_train[i].T))),x_train[i].T)+lamb*w
            sum_error += np.sum(E_del**2) / y_train.shape[0]
            w = w - lr*E_del
        if not silent:
            print('>epoch={}, lrate={}, error={}'.format(j+1, lr, sum_error))
    
    return w  

def select_params_logistic(lamb_list,lr_list,x_train,y_train,label_train,x_val,y_val,label_val,epoch):
    """ This function compares model performances from different parameters based on the validation accuracy for logistic regression.
    lamb_list: list of regularization factors to search.
    lr_list: list of learning rates to search.
    x_train, y_train, label_train: training data, training traget matrix and training labels. 
    x_val,y_val,label_val: validation data, validation target matrix and validation labels.
    epoch: number of epochs to run."""
    
    max_acc=0
    lamb_best = lamb_list[0]
    lr_best = lr_list[0]
    acc_all = np.zeros((lr_list.shape[0],lamb_list.shape[0]))
    for i in range(len(lr_list)):
        for j in range(len(lamb_list)):
            w = logistic_regression(x_train, y_train, lamb_list[j], lr_list[i], epoch)
            E_val,acc_val = EMS_logistic(w,x_val,y_val,label_val,lamb_list[j])
            acc_all[i,j] = acc_val
            if acc_val > max_acc:
                max_acc = acc_val
                lamb_best = lamb_list[j]
                lr_best = lr_list[i]
    return lamb_best, lr_best, acc_all  

def get_cnn_model(filters,drop_out):
    """" This function returns a convolutional neural networks model which has one convolutional layer and one standard NN layer.
    filters: Number of filters.
    drop_out: Dropout rate."""
    
    model = Sequential()
    
    model.add(Conv2D(filters = filters, kernel_size=(3,3), activation='relu',
                    input_shape=(28,28,1)))
    model.add(BatchNormalization())  #Can make training faster
    model.add(Conv2D(filters = filters, kernel_size=(3,3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(strides=(2,2)))
    model.add(Dropout(drop_out)) #prevent overfitting
    
    model.add(Flatten()) #Compress output into one vector
    model.add(Dense(512, activation='relu')) #First hidden layer in NN
    model.add(Dropout(drop_out))
    model.add(Dense(10, activation='softmax')) #final output
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

def get_best_params_cnn(x_train,y_train,x_val,y_val,filter_list,do_list):
    """ This function searches for the best combination of the two hyperparameters in cnn 
    filter_list: number of filters to search from.
    do_list: dropout rates to search from."""
    
    # Specify key parameters
    num_epochs = 5
    model_batch_size = 200
    
    # First find the best number of filters        
    val_acc_filter = []
    for fil in filter_list:
        model = get_cnn_model(fil,do_list[2])
        
        history = model.fit(x_train,
                            y_train,
                            validation_data=(x_val, y_val),
                            epochs=num_epochs,
                            batch_size=model_batch_size,
                            verbose=1)
        print('Number of filter {} is tested.'.format(fil))
        del model
        df = pd.DataFrame(history.history)
        val_acc_filter.append(np.max(df['val_acc']))
    index = np.argmax(val_acc_filter)
    filter_best = filter_list[index]
    print('Best number of filter for this model is {}'.format(filter_best))
    
    #Now choose the best dropout rate.
    val_acc_do = []
    for drop_out in do_list:
        model = get_cnn_model(filter_list[2],drop_out)    
        
        history = model.fit(x_train,
                            y_train,
                            validation_data=(x_val, y_val),
                            epochs=num_epochs,
                            batch_size=model_batch_size,
                            verbose=1)
        print('Dropout rate {} is tested.'.format(drop_out))
        del model
        df = pd.DataFrame(history.history)
        val_acc_do.append(np.max(df['val_acc']))
    index = np.argmax(val_acc_do)
    do_best = do_list[index]
    print('Best drop out for this model is {}'.format(do_best))
    
    return filter_best, do_best, val_acc_filter, val_acc_do 

def get_dnn_model(drop_out,nodes):
    """" This function returns a two layer deep neural networks model.
    drop_out: Dropout rate.
    nodes: number of nodes in hidden layers."""
    
    first_dense_layer_nodes = nodes
    second_dense_layer_nodes = nodes
    
    model = Sequential()
    model.add(Dense(first_dense_layer_nodes, input_dim=784))
    model.add(Activation('relu'))
    model.add(Dropout(drop_out))

    model.add(Dense(second_dense_layer_nodes))
    model.add(Activation('relu'))
    model.add(Dropout(drop_out))

    model.add(Dense(10))
    model.add(Activation('softmax'))        

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model   

def get_best_params_dnn(x_train,y_train,x_val,y_val,nodes_list,do_list):
    """ This function searches for the best combination of the two hyperparameters in cnn 
    x_train, y_train: training data and training target matrix.
    x_val, y_val: validation data and validation target matrix.
    nodes_list: number of nodes in hidden layers to search from.
    do_list: dropout rates to search from."""
    
    # Specify some key parameters
    num_epochs = 100
    model_batch_size = 100
    tb_batch_size = 32
    early_patience = 20
    
    tensorboard_cb   = TensorBoard(log_dir='logs', batch_size= tb_batch_size, write_graph= True)
    earlystopping_cb = EarlyStopping(monitor='val_loss', verbose=1, patience=early_patience, mode='min')
    
    # First find the best number of nodes        
    val_acc_nodes = []
    for nodes in nodes_list:
        model = get_dnn_model(do_list[2],nodes)
        
        history = model.fit(x_train,
                            y_train,
                            validation_data=(x_val, y_val),
                            epochs=num_epochs,
                            batch_size=model_batch_size,
                            callbacks = [tensorboard_cb,earlystopping_cb],
                            verbose=0)
        print('nodes {} is tested.'.format(nodes))
        del model
        df = pd.DataFrame(history.history)
        val_acc_nodes.append(np.max(df['val_acc']))
    index = np.argmax(val_acc_nodes)
    nodes_best = nodes_list[index]
    print('Best number of nodes for this model is {}'.format(nodes_best))
    
    #Now choose the best dropout rate.
    val_acc_do = []
    for drop_out in do_list:
        model = get_dnn_model(drop_out,nodes_best)    
        
        history = model.fit(x_train,
                            y_train,
                            validation_data=(x_val, y_val),
                            epochs=num_epochs,
                            batch_size=model_batch_size,
                            callbacks = [tensorboard_cb,earlystopping_cb],
                            verbose=0)
        print('Dropout rate {} is tested.'.format(drop_out))
        del model
        df = pd.DataFrame(history.history)
        val_acc_do.append(np.max(df['val_acc']))
    index = np.argmax(val_acc_do)
    do_best = do_list[index]
    print('Best drop out for this model is {}'.format(do_best))
      
    
    return do_best, nodes_best,  val_acc_nodes, val_acc_do 
    
def accuracy_metrics(model,x_input,y_input):
    """ This function computes accuracies from machine learning model prediction.
    model: the model to make predictions.
    x_input: the test data.
    y_input: the test labels."""
    y = model.predict(x_input)
    y_pred = [np.argmax(a) for a in y]
    from sklearn.metrics import accuracy_score
    acc = accuracy_score(y_input,y_pred)
    return acc    

Read in datasets

In [3]:
# Read USPS dataset
x_usps = []
label_usps = []
for j in range(10):
    train_files = [f for f in listdir('USPSdata/Numerals/'+str(j)+'/') if 'png' in f]
    for f in train_files:
        img = Image.open('USPSdata/Numerals/'+str(j)+'/'+f).convert('L').resize((28,28))
        arr = list(img.getdata())
        x_usps.append(arr)
        label_usps.append(j)
x_usps = np.array(x_usps)
label_usps = np.array(label_usps)

# Shuffle the dataset
indices = np.random.permutation(x_usps.shape[0])
x_usps_test = x_usps[indices,:]
label_usps_test = label_usps[indices]
# Normalize the dataset
x_usps_test = 1.0-x_usps_test/255.0

# Read MNIST data
filename = 'mnist.pkl.gz'
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()
x_train, x_val, x_test = training_data[0], validation_data[0], test_data[0]
label_train, label_val, label_test = training_data[1], validation_data[1], test_data[1]

# One hot encoding for all the labels
y_train = np_utils.to_categorical(label_train).astype(np.float64)
y_val = np_utils.to_categorical(label_val).astype(np.float64)
y_test = np_utils.to_categorical(label_test).astype(np.float64)
y_usps_test = np_utils.to_categorical(label_usps_test).astype(np.float64)

Five machine learning models are used to train the datasets. A grid search technique was applied in all models to find the optimum parameters. Because of the large size of the training set, it takes a very long time to perform the grid search. So the process was run once and then committed out. The best parameters were then hrad coded based on the grid search results. For the Gaussian support vector machines models, one single run takes 4-5 hrs, so it is impossible to run the grid search on them.

Logistic Regression

In [5]:
#    # Finding the best hyperparameters for logistic regression
#    lamb_list = [0.005, 0.01, 0.04, 0.07]
#    lr_list = [0.0005, 0.001, 0.005, 0.01] 
#    lamb_best, lr_best, Eval_all = select_params_logistic(np.array(lamb_list),np.array(lr_list),x_train,y_train,label_train,x_val,y_val,label_val,20)
#    lr_index = lr_list.index(lr_best)
#    lamb_index = lamb_list.index(lamb_best)
#    plt.figure()
#    plt.plot(lamb_list, Eval_all[lr_index,:],'o')
#    plt.xlabel(r'$\lambda$', fontsize=20)
#    plt.ylabel('Validation Accuracy', fontsize=20)
#    plt.title(r'Logistic Regression', fontsize=20)
#    plt.figure()
#    plt.plot(lr_list, Eval_all[:,lamb_index],'o')
#    plt.xlabel('Learning Rate', fontsize=20)
#    plt.ylabel('Validation Accuracy', fontsize=20)
#    plt.title(r'Logistic Regression ', fontsize=20)

In [6]:
lamb_best = 0.005
lr_best = 0.0005

# Construct the optimum logistic regression model
w = logistic_regression(x_train, y_train, lamb_best, lr_best, 20, silent=True)
E_train, acc_train = EMS_logistic(w,x_train,y_train,label_train,lamb_best)
E_val, acc_val = EMS_logistic(w,x_val,y_val,label_val,lamb_best)
E_test, acc_test = EMS_logistic(w,x_test,y_test,label_test,lamb_best)
E_usps_test, acc_usps_test = EMS_logistic(w,x_usps_test,y_usps_test,label_usps_test,lamb_best)

print ('----------------------------------------------------')
print ("-------Logistic Regression Results-------")
print ('----------------------------------------------------')
print ("Best parameters: \nLambda = {} \nrate = {}".format(lamb_best,lr_best))
print ("EMS Training: {}, accuracy Training: {}".format(E_train,acc_train))
print ("EMS Validation: {}, accuracy Validation: {}".format(E_val,acc_val))
print ("EMS Testing: {}, accuracy Testing: {}".format(E_test,acc_test))    
print ("EMS USPS: {}, accuracy USPS: {}".format(E_usps_test,acc_usps_test))  


----------------------------------------------------
-------Logistic Regression Results-------
----------------------------------------------------
Best parameters: 
Lambda = 0.005 
rate = 0.0005
EMS Training: 0.8567894208340373, accuracy Training: 0.90588
EMS Validation: 0.8262256118911976, accuracy Validation: 0.9119
EMS Testing: 0.8362218267663069, accuracy Testing: 0.9087
EMS USPS: 2.164904157896523, accuracy USPS: 0.36566828341417074


In [5]:
# Calculate confusion matrices for both MNIST and USPS datasets
y_pred_log = softmax(w.dot(x_test.T)).T
label_pred_log = [np.argmax(y_pred_log[i]) for i in range(len(y_pred_log))]
cf_log = confusion_matrix(label_test, label_pred_log)
y_usps_pred_log = softmax(w.dot(x_usps_test.T)).T
label_usps_pred_log = [np.argmax(y_usps_pred_log[i]) for i in range(len(y_usps_pred_log))]
cf_usps_log = confusion_matrix(label_usps_test, label_usps_pred_log)
print ('\n')
print ('Confusion matrix by logistic regression model on MNIST test set:')
print (cf_log)
print ('Confusion matrix by logistic regression model on USPS test set:')
print (cf_usps_log)    



Confusion matrix by logistic regression model on MNIST test set:
[[ 960    0    1    1    1    5    7    1    4    0]
 [   0 1106    2    3    1    2    4    1   16    0]
 [   9    7  898   13   17    3   16   16   45    8]
 [   4    1   22  894    1   38    4   13   23   10]
 [   1    5    4    1  916    0    9    1    8   37]
 [  11    5    1   30   10  768   15    9   35    8]
 [  12    3    4    0   11   16  905    1    6    0]
 [   2   17   24    4    9    0    0  930    3   39]
 [   6    9    5   19    9   31   13   14  854   14]
 [  11    9    4    9   44   11    0   20    6  895]]
Confusion matrix by logistic regression model on USPS test set:
[[ 626    3  243   54  227  126   76   48  173  424]
 [ 170  405   14  302  358   73   34  313  301   30]
 [ 196   29 1176  153   76   89   88   76   87   29]
 [  93    3  131 1234   24  299   15   64   91   46]
 [  54   83   36   47 1014   83   31  148  317  187]
 [ 169   23  128  159   46 1140  118   80  101   36]
 [ 275   16  378   9

Support Vectors Machines

In [6]:
#    # Finding the best regularization factor for linear vector machines
#    C_factors = [1,2,5,10]
#    # Linear Kernel
#    #Find the best regularization factor to use  
#    acc_val_linear_all = []
#    for C in C_factors:
#        svc_linear = SVC(kernel='linear', C=C)
#        svc_linear.fit(x_train,label_train)
#        acc_val_svc_linear = svc_linear.score(x_val,label_val)
#        acc_val_linear_all.append(acc_val_svc_linear)
#    plt.figure()
#    plt.plot(C_factors, acc_val_linear_all, 'o')
#    plt.xlabel('Regularization Factor', fontsize=20)
#    plt.ylabel('Validation Accuracy', fontsize=20)
#    plt.title(r'Linear SVC', fontsize=20)
#    
#    C_best_linear = C_factors[np.argmax(acc_val_linear_all)]    

In [7]:
C_best_linear = 1

svc_linear = SVC(kernel='linear', C=C_best_linear)
svc_linear.fit(x_train,label_train)
acc_train_svc_linear, acc_val_svc_linear, acc_test_svc_linear = svc_linear.score(x_train,label_train), svc_linear.score(x_val,label_val), svc_linear.score(x_test,label_test)
acc_usps_test_svc_linear = svc_linear.score(x_usps_test, label_usps_test)
#y_pred_svc = svc.predict(x_test)

print ('\n')
print ('----------------------------------------------------')
print ("-------Support Vector Machine Results-------")
print ('----------------------------------------------------')
print ('Linear Kernel')
print ("Best parameter: regularization factor = {}".format(C_best_linear))
print ("accuracy Training: {}".format(acc_train_svc_linear))
print ("accuracy Validation: {}".format(acc_val_svc_linear))
print ("accuracy Testing: {}".format(acc_test_svc_linear))    
print ("accuracy USPS: {}".format(acc_usps_test_svc_linear)) 



----------------------------------------------------
-------Support Vector Machine Results-------
----------------------------------------------------
Linear Kernel
Best parameter: regularization factor = 1
accuracy Training: 0.97246
accuracy Validation: 0.9423
accuracy Testing: 0.939
accuracy USPS: 0.3272163608180409


In [8]:
# Calculate confusion matrices for both MNIST and USPS datasets
label_pred_svc = svc_linear.predict(x_test)
cf_svc = confusion_matrix(label_test, label_pred_svc)
label_usps_pred_svc = svc_linear.predict(x_usps_test)
cf_usps_svc = confusion_matrix(label_usps_test, label_usps_pred_svc)
print ('\n')
print ('Confusion matrix by linear svc model on MNIST test set:')
print (cf_svc)
print ('Confusion matrix by linear svc model on USPS test set:')
print (cf_usps_svc)    



Confusion matrix by linear svc model on MNIST test set:
[[ 959    0    5    2    2    4    7    0    1    0]
 [   0 1121    3    3    0    1    2    1    4    0]
 [   6    8  968    9    3    2   11   10   13    2]
 [   5    2   17  944    4   13    1    8   13    3]
 [   2    1   10    1  943    0    4    2    2   17]
 [  13    4    2   39    5  792    9    1   22    5]
 [  10    3   11    1    5   14  911    2    1    0]
 [   1    8   20   10    6    1    0  961    3   18]
 [   8    4    9   25   11   27    6    5  871    8]
 [   7    6    2   13   32    4    0   18    7  920]]
Confusion matrix by linear svc model on USPS test set:
[[ 483    3  320   70  254  298   58  125   14  375]
 [  60  475  136  300  342  167   22  413   62   23]
 [ 176   91 1163  126   55  209   60   62   37   20]
 [  70   59  294  922   14  509    5   54   54   19]
 [  26   25  136   76  884  194    9  463   84  103]
 [  60   17  166  250   85 1203   36   50  104   29]
 [ 168   23  730   50  154  312  539  

In [9]:
##     Radial Basis Functions Kernel with gamma = 1
#    svc_rbf1 = SVC(kernel='rbf', gamma=1)
#    svc_rbf1.fit(x_train,label_train)
#    acc_train_svc_rbf1, acc_val_svc_rbf1, acc_test_svc_rbf1 = svc_rbf1.score(x_train,label_train), svc_rbf1.score(x_val,label_val), svc_rbf1.score(x_test,label_test)
#    acc_usps_test_svc_rbf1 = svc_rbf1.score(x_usps_test, label_usps_test)    
#
#    print ('\n')    
#    print ('----------------------------------------------------')    
#    print ('Radial Basis Function Kernel')
#    print ('gamma=1')
#    print ("accuracy Training: {}".format(acc_train_svc_rbf1))
#    print ("accuracy Validation: {}".format(acc_val_svc_rbf1))
#    print ("accuracy Testing: {}".format(acc_test_svc_rbf1))    
#    print ("accuracy USPS: {}".format(acc_usps_test_svc_rbf1))
#    label_pred_svc_rbf1 = svc_rbf1.predict(x_test)
#    cf_svc_rbf1 = confusion_matrix(label_test, label_pred_svc_rbf1)
#    label_usps_pred_svc_rbf1 = svc_rbf1.predict(x_usps_test)
#    cf_usps_svc_rbf1 = confusion_matrix(label_usps_test, label_usps_pred_svc_rbf1)
#    print ('\n')
#    print ('Confusion matrix by rbf svc model (gamma=1.0) on MNIST test set:')
#    print (cf_svc_rbf1)
#    print ('Confusion matrix by rbf svc model (gamma=1.0) on USPS test set:')
#    print (cf_usps_svc_rbf1)

In [10]:
##     Radial Basis Functions Kernel with gamma = auto
    
#    svc_rbf2 = SVC(kernel='rbf', gamma='auto')
#    svc_rbf2.fit(x_train,label_train)
#    acc_train_svc_rbf2, acc_val_svc_rbf2, acc_test_svc_rbf2 = svc_rbf2.score(x_train,label_train), svc_rbf2.score(x_val,label_val), svc_rbf2.score(x_test,label_test)
#    acc_usps_test_svc_rbf2 = svc_rbf2.score(x_usps_test, label_usps_test)   
#   
#    print ('gamma=auto')
#    print ("accuracy Training: {}".format(acc_train_svc_rbf2))
#    print ("accuracy Validation: {}".format(acc_val_svc_rbf2))
#    print ("accuracy Testing: {}".format(acc_test_svc_rbf2))    
#    print ("accuracy USPS: {}".format(acc_usps_test_svc_rbf2))
#    label_pred_svc_rbf2 = svc_rbf2.predict(x_test)
#    cf_svc_rbf2 = confusion_matrix(label_test, label_pred_svc_rbf2)
#    label_usps_pred_svc_rbf2 = svc_rbf2.predict(x_usps_test)
#    cf_usps_svc_rbf2 = confusion_matrix(label_usps_test, label_usps_pred_svc_rbf2)
#    print ('\n')
#    print ('Confusion matrix by rbf svc model (gamma=auto) on MNIST test set:')
#    print (cf_svc_rbf2)
#    print ('Confusion matrix by rbf svc model (gamma=auto) on USPS test set:')
#    print (cf_usps_svc_rbf2)

Random Forest

In [11]:
#    # Find the best number of trees to use
#    n_trees = [10,25,50,100,150,200]
#    acc_val_all = []
#    for n in n_trees:
#        rf = RandomForestClassifier(n_estimators=n)
#        rf.fit(x_train,label_train)
#        acc_val_rf = rf.score(x_val,label_val)    
#        acc_val_all.append(acc_val_rf)
#    plt.figure()
#    plt.plot(n_trees, acc_val_all,'o')
#    plt.xlabel('Number of Trees', fontsize=20)
#    plt.ylabel('Validation Accuracy', fontsize=20)
#    plt.title(r'Random Forest', fontsize=20)
#    n_trees_best = n_trees[np.argmax(acc_val_all)]

In [12]:
n_trees_best = 200

rf = RandomForestClassifier(n_estimators=n_trees_best)
rf.fit(x_train,label_train)
acc_train_rf, acc_val_rf, acc_test_rf = rf.score(x_train,label_train), rf.score(x_val,label_val), rf.score(x_test,label_test)
acc_usps_test_rf = rf.score(x_usps_test, label_usps_test)
#y_pred_svc = svc.predict(x_test)

print ('\n')
print ('----------------------------------------------------')
print ("-------Random Forests Results-------")
print ('----------------------------------------------------')
print ("Best parameters: \nnumber of trees = {}".format(n_trees_best))
print ("accuracy Training: {}".format(acc_train_rf))
print ("accuracy Validation: {}".format(acc_val_rf))
print ("accuracy Testing: {}".format(acc_test_rf))    
print ("accuracy USPS: {}".format(acc_usps_test_rf)) 



----------------------------------------------------
-------Random Forests Results-------
----------------------------------------------------
Best parameters: 
number of trees = 200
accuracy Training: 1.0
accuracy Validation: 0.9733
accuracy Testing: 0.9698
accuracy USPS: 0.4136706835341767


In [13]:
# Calculate confusion matrices for both MNIST and USPS datasets
label_pred_rf = rf.predict(x_test)
cf_rf = confusion_matrix(label_test, label_pred_rf)
label_usps_pred_rf = rf.predict(x_usps_test)
cf_usps_rf = confusion_matrix(label_usps_test, label_usps_pred_rf)
print ('\n')
print ('Confusion matrix by random forest model on MNIST test set:')
print (cf_rf)
print ('Confusion matrix by random forest model on USPS test set:')
print (cf_usps_rf)



Confusion matrix by random forest model on MNIST test set:
[[ 970    0    2    0    0    2    3    1    2    0]
 [   0 1122    3    3    0    1    3    0    2    1]
 [   6    0 1000    6    2    0    3    9    6    0]
 [   0    0   10  974    0    6    0    9    8    3]
 [   1    0    1    0  954    0    5    0    2   19]
 [   2    0    0   13    3  859    5    2    6    2]
 [   8    3    1    0    2    2  939    0    3    0]
 [   0    2   18    2    0    0    0  993    1   12]
 [   4    0    5   10    3    5    4    5  927   11]
 [   8    5    2    8   10    4    1    5    6  960]]
Confusion matrix by random forest model on USPS test set:
[[ 611   21  251   59  449  141   58  128    3  279]
 [   8  717   24  100   16  120   32  980    2    1]
 [  78   42 1220   73   56  196   12  313    6    3]
 [  30    8   66 1284   45  338    0  199    5   25]
 [   7  237   46   24 1104  144   13  377   29   19]
 [  74   47   68   75   13 1575    9  127    4    8]
 [ 281   50  232   18   99  378 

Deep Neural Networks

In [15]:
#    # Find the best paramters for DNN
#    nodes_list = [128,256,512,1024]
#    do_list = [0.1,0.2,0.3,0.4]
#    
#    do_best, nodes_best, val_acc_nodes, val_acc_do = get_best_params_dnn(x_train,y_train,x_val,y_val,nodes_list,do_list)    
#
#    plt.figure()
#    plt.plot(nodes_list, val_acc_nodes, 'o')
#    plt.xlabel('Number of Nodes in Hidden Layers', fontsize=15)
#    plt.ylabel('Validation Accuracy', fontsize=15)
#    plt.xticks(fontsize=15)
#    plt.yticks(fontsize=15)
#    plt.title('Deep Neural Networks')
#    plt.figure()
#    plt.plot(do_list, val_acc_do, 'o')
#    plt.xlabel('Drop Out Rate', fontsize=15)
#    plt.ylabel('Validation Accuracy', fontsize=15)
#    plt.xticks(fontsize=15)
#    plt.yticks(fontsize=15)
#    plt.title('Deep Neural Networks')

In [17]:
do_best = 0.3
nodes_best = 1024

dnn_best = get_dnn_model(do_best, nodes_best)
tensorboard_cb   = TensorBoard(log_dir='logs', batch_size= 32, write_graph= True)
earlystopping_cb = EarlyStopping(monitor='val_loss', verbose=1, patience=20, mode='min')

history = dnn_best.fit(x_train,
                    y_train,
                    validation_data=(x_val, y_val),
                    epochs=100,
                    batch_size=100,
                    callbacks = [tensorboard_cb,earlystopping_cb],
                    verbose=1)    
df = pd.DataFrame(history.history)
df.plot(subplots=True, grid=True, figsize=(10,15))
plt.title('Deep Neural Networks Optimum Performance')

acc_train_dnn, acc_val_dnn, acc_test_dnn = accuracy_metrics(dnn_best, x_train,label_train), accuracy_metrics(dnn_best, x_val,label_val), accuracy_metrics(dnn_best, x_test,label_test)
acc_usps_test_dnn = accuracy_metrics(dnn_best, x_usps_test,label_usps_test)

print ('\n')
print ('----------------------------------------------------')
print ("-------Deep Neural Networks Results-------")
print ('----------------------------------------------------')
print ("Best parameters: \nnodes = {} \ndropout rate = {}".format(nodes_best,do_best))
print ("accuracy Training: {}".format(acc_train_dnn))
print ("accuracy Validation: {}".format(acc_val_dnn))
print ("accuracy Testing: {}".format(acc_test_dnn))    
print ("accuracy USPS: {}".format(acc_usps_test_dnn))

Train on 50000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 00024: early stopping


----------------------------------------------------
-------Deep Neural Networks Results-------
----------------------------------------------------
Best parameters: 
nodes = 1024 
dropout rate = 0.3
accuracy Training: 0.99672
accuracy Validation: 0.9794
accuracy Testing: 0.9787
accuracy USPS: 0.5296764838241912


In [18]:
# Calculate confusion matrices for both MNIST and USPS datasets
y_pred_dnn = dnn_best.predict(x_test)
label_pred_dnn = [np.argmax(y_pred_dnn[i]) for i in range(len(y_pred_dnn))]
cf_dnn = confusion_matrix(label_test, label_pred_dnn)
y_usps_pred_dnn = dnn_best.predict(x_usps_test)
label_usps_pred_dnn = [np.argmax(y_usps_pred_dnn[i]) for i in range(len(y_usps_pred_dnn))]
cf_usps_dnn = confusion_matrix(label_usps_test, label_usps_pred_dnn)
print ('\n')
print ('Confusion matrix by DNN model on MNIST test set:')
print (cf_dnn)
print ('Confusion matrix by DNN model on USPS test set:')
print (cf_usps_dnn)    



Confusion matrix by DNN model on MNIST test set:
[[ 973    1    0    0    0    0    4    1    1    0]
 [   0 1129    2    0    0    0    2    0    2    0]
 [   4    1 1001   15    1    0    0    5    5    0]
 [   0    0    1 1001    0    1    0    5    2    0]
 [   1    1    5    0  952    0    7    3    1   12]
 [   2    0    0   10    1  869    3    0    6    1]
 [   2    3    0    0    1    3  946    0    3    0]
 [   2    3    7    5    1    0    0 1008    2    0]
 [   3    1    2    8    1    3    0    4  951    1]
 [   1    4    0    9   10    7    1   13    7  957]]
Confusion matrix by DNN model on USPS test set:
[[ 732    0   91   96  187   53   97   50  137  557]
 [  35  773   45  124  425   39   64  391   65   39]
 [  77   18 1622  102   25   46   36   44   26    3]
 [  14    9  141 1559    6  215    3   18   34    1]
 [   3   78   27   33 1250   52   17  294  215   31]
 [  64    6   20  115   11 1530   31   44  169   10]
 [  65   15  209   22   42   89 1506    5   41    6]

Convolutional Neural Networks

In [19]:
# Reszie the x datasets
x_train_cnn = x_train.reshape(-1, 28, 28, 1)
x_val_cnn = x_val.reshape(-1,28,28,1)
x_test_cnn = x_test.reshape(-1,28,28,1)
x_usps_test_cnn = x_usps_test.reshape(-1,28,28,1)

In [20]:
#    # Find the best parameters for CNN
#    filter_list = [8,16,24,32]
#    do_list = [0.1,0.2,0.3,0.4]
#    
#    filter_best, do_best, val_acc_filter, val_acc_do = get_best_params_cnn(x_train,y_train,x_val,y_val,filter_list,do_list)
#    
#    plt.figure()
#    plt.plot(filter_list, val_acc_filter, 'o')
#    plt.xlabel('Number of Filters', fontsize=15)
#    plt.ylabel('Validation Accuracy', fontsize=15)
#    plt.xticks(fontsize=15)
#    plt.yticks(fontsize=15)
#    plt.title('Convolutional Neural Networks',fontsize=15)
#    plt.figure()
#    plt.plot(do_list, val_acc_do, 'o')
#    plt.xlabel('Drop Out Rate', fontsize=15)
#    plt.ylabel('Validation Accuracy', fontsize=15)
#    plt.xticks(fontsize=15)
#    plt.yticks(fontsize=15)
#    plt.title('Convolutional Neural Networks',fontsize=15)

In [21]:
filter_best = 16
do_best_cnn = 0.3
cnn_best = get_cnn_model(filter_best,do_best_cnn)
history = cnn_best.fit(x_train_cnn,
                y_train,
                validation_data=(x_val_cnn,y_val),
                epochs=10,
                batch_size=200,
                verbose=1)

df = pd.DataFrame(history.history)
df.plot(subplots=True, grid=True, figsize=(10,15))
plt.title('Convolutional Neural Networks Optimum Performance')

acc_train_cnn, acc_val_cnn, acc_test_cnn = accuracy_metrics(cnn_best,x_train_cnn,label_train), accuracy_metrics(cnn_best,x_val_cnn,label_val), accuracy_metrics(cnn_best,x_test_cnn,label_test)
acc_usps_test_cnn = accuracy_metrics(cnn_best,x_usps_test_cnn,label_usps_test)

print ('\n')
print ('----------------------------------------------------')
print ("-------Convolutional Neural Networks Results-------")
print ('----------------------------------------------------')
print ("Best parameters: \nnumber of filters = {} \ndropout rate = {}".format(filter_best,do_best))
print ("accuracy Training: {}".format(acc_train_cnn))
print ("accuracy Validation: {}".format(acc_val_cnn))
print ("accuracy Testing: {}".format(acc_test_cnn))    
print ("accuracy USPS: {}".format(acc_usps_test_cnn))   

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


----------------------------------------------------
-------Convolutional Neural Networks Results-------
----------------------------------------------------
Best parameters: 
number of filters = 16 
dropout rate = 0.3
accuracy Training: 0.99808
accuracy Validation: 0.9883
accuracy Testing: 0.99
accuracy USPS: 0.5852792639631982


In [22]:
# Calculate confusion matrices for both MNIST and USPS datasets
y_pred_cnn = cnn_best.predict(x_test_cnn)
label_pred_cnn = [np.argmax(y_pred_cnn[i]) for i in range(len(y_pred_cnn))]
cf_cnn = confusion_matrix(label_test, label_pred_cnn)
y_usps_pred_cnn = cnn_best.predict(x_usps_test_cnn)
label_usps_pred_cnn = [np.argmax(y_usps_pred_cnn[i]) for i in range(len(y_usps_pred_cnn))]
cf_usps_cnn = confusion_matrix(label_usps_test, label_usps_pred_cnn)
print ('\n')
print ('Confusion matrix by CNN model on MNIST test set:')
print (cf_cnn)
print ('Confusion matrix by CNN model on USPS test set:')
print (cf_usps_cnn)    



Confusion matrix by CNN model on MNIST test set:
[[ 977    0    1    0    0    0    0    1    1    0]
 [   0 1133    0    0    0    0    1    1    0    0]
 [   4    2 1020    1    1    0    1    3    0    0]
 [   0    0    0 1008    0    2    0    0    0    0]
 [   0    0    0    0  976    0    4    0    1    1]
 [   1    0    0    6    0  880    4    1    0    0]
 [   4    2    0    0    1    1  950    0    0    0]
 [   0    1    4    1    0    0    0 1021    1    0]
 [   1    0    2    4    0    0    2    1  961    3]
 [   2    4    0    3    8    5    0    9    4  974]]
Confusion matrix by CNN model on USPS test set:
[[ 812    2   45   36  193    5   20    9  116  762]
 [  51  932   55   21  492   18   38  324   66    3]
 [  56   33 1566   93   24   23   18   32  152    2]
 [   7    2   35 1665    1  225    5    3   54    3]
 [   4   13   47   29 1382   13   13  169  322    8]
 [  26    0    7  206    3 1650    4   12   30   62]
 [ 200    4  139   12   53   32 1472    2   75   11]

Combining Models

In [23]:
# 1. Hard Majority Voting Method

label_pred_combine = []
for i in range(len(label_test)):
    label_temp = [label_pred_log[i], label_pred_svc[i], label_pred_rf[i], label_pred_dnn[i], label_pred_cnn[i]]
    label_pred_combine.append(max(set(label_temp), key=label_temp.count))
label_usps_pred_combine = []
for j in range(len(label_usps_test)):
    label_temp = [label_usps_pred_log[j], label_usps_pred_svc[j], label_usps_pred_rf[j], label_usps_pred_dnn[j], label_usps_pred_cnn[j]]
    label_usps_pred_combine.append(max(set(label_temp), key=label_temp.count))

# Calculate the accuracy of the combined model on different datasets
acc_test_combine, acc_usps_test_combine = accuracy_score(label_test,label_pred_combine), accuracy_score(label_usps_test,label_usps_pred_combine)
print ('\n')
print ('----------------------------------------------------')
print ("-------Combined Model (Hard Voting) Results-------")
print ('----------------------------------------------------')
print ("accuracy Testing: {}".format(acc_test_combine))    
print ("accuracy USPS: {}".format(acc_usps_test_combine))       



----------------------------------------------------
-------Combined Model (Hard Voting) Results-------
----------------------------------------------------
accuracy Testing: 0.978
accuracy USPS: 0.4920246012300615


In [24]:
# Calculate confusion matrices for both MNIST and USPS datasets
cf_combine = confusion_matrix(label_test, label_pred_combine)
cf_usps_combine = confusion_matrix(label_usps_test, label_usps_pred_combine)
print ('\n')
print ('Confusion matrix by the combined model (hard voting) on MNIST test set:')
print (cf_combine)
print ('Confusion matrix by the combined model (hard voting) on USPS test set:')
print (cf_usps_combine)  



Confusion matrix by the combined model (hard voting) on MNIST test set:
[[ 975    0    1    0    0    1    1    1    1    0]
 [   0 1127    3    1    0    0    2    0    2    0]
 [   4    0 1009    5    1    0    1    6    6    0]
 [   0    0    7  992    0    1    0    3    6    1]
 [   1    0    4    0  962    0    4    0    2    9]
 [   3    0    0   11    0  863    5    1    7    2]
 [   7    3    1    0    2    4  939    0    2    0]
 [   1    2   14    1    0    0    0 1001    2    7]
 [   4    0    2    6    4    4    1    3  946    4]
 [   9    5    0    8   10    0    0    7    4  966]]
Confusion matrix by the combined model (hard voting) on USPS test set:
[[ 761    2  190   50  250   75   32   29   84  527]
 [  55  744   29  198  332   84   28  457   67    6]
 [ 133   27 1568   66   32   74   19   48   26    6]
 [  42    9  125 1507    6  240    1   23   37   10]
 [  15   91   29   35 1254   66    7  222  232   49]
 [  79   16   67  143   14 1559    9   30   68   15]
 [ 235

In [25]:
# 2. Boosting Method

def missclassified(label_true,label_pred):
    miss_list = []
    for i in range(len(label_true)):
        if label_pred[i] != label_true[i]:
            miss_list.append(i)
    return miss_list

""" Construct all five models to be combined with """
svc_linear1 = SVC(kernel='linear', C=C_best_linear) 
rf1 = RandomForestClassifier(n_estimators=n_trees_best)
dnn1 = get_dnn_model(do_best, nodes_best)  
cnn1 = get_cnn_model(filter_best, do_best_cnn)

"""iteration 1: logistic regreesion. Select 60% of training set randomly with equal weights. Then train with logistic regression."""
index_list = list(range(x_train.shape[0]))
list1 = random.choices(index_list,k=int(0.6*x_train.shape[0]))
x_train1, y_train1 = x_train[list1,:], y_train[list1,:]
w1 = logistic_regression(x_train1, y_train1, lamb_best, lr_best, 20, silent=True)
y_pred_log1 = softmax(w1.dot(x_train.T)).T
label_pred_log1 = [np.argmax(y_pred_log1[i]) for i in range(x_train.shape[0])]
miss_list1 = missclassified(label_train,label_pred_log1)
# Set the weights of missclassified samples 5 times higher
index_list += miss_list1*4

"""iteration 2: SVC. Select 60% of training set randomly with the new weights. Then train with linear SVC."""
list2 = random.choices(index_list,k=int(0.6*x_train.shape[0]))
x_train2, label_train2 = x_train[list2,:], label_train[list2]
svc_linear1.fit(x_train2, label_train2)
label_pred_svc1 = svc_linear1.predict(x_train)
miss_list2 = missclassified(label_train,label_pred_svc1)
index_list += miss_list2*4

"""iteration 3: Random Forest. Select 60% of training set randomly with the new weights. Then train with random forest."""
list3 = random.choices(index_list,k=int(0.6*x_train.shape[0]))
x_train3, label_train3 = x_train[list3,:], label_train[list3]
rf1.fit(x_train3, label_train3)
label_pred_rf1 = rf1.predict(x_train)
miss_list3 = missclassified(label_train,label_pred_rf1)
index_list += miss_list3*4    

"""iteration 4: DNN. Select 60% of training set randomly with the new weights. Then train with DNN."""
list4 = random.choices(index_list,k=int(0.6*x_train.shape[0]))
x_train4, y_train4 = x_train[list4,:], y_train[list4,:]
history = dnn1.fit(x_train4,
                    y_train4,
                    validation_data=(x_val, y_val),
                    epochs=100,
                    batch_size=100,
                    callbacks = [tensorboard_cb,earlystopping_cb],
                    verbose=1) 
y_pred_dnn1 = dnn1.predict(x_train)
label_pred_dnn1 = [np.argmax(y_pred_dnn1[i]) for i in range(len(y_pred_dnn1))]      
miss_list4 = missclassified(label_train,label_pred_dnn1)
index_list += miss_list4*4 

"""iteration 5: CNN. Select 60% of training set randomly with the new weights. Then train with CNN."""
list5 = random.choices(index_list,k=int(0.6*x_train.shape[0]))
x_train5, y_train5 = x_train_cnn[list5,:,:,:], y_train[list5,:]
history = cnn1.fit(x_train5,
            y_train5,
            validation_data=(x_val_cnn,y_val),
            epochs=5,
            batch_size=200,
            verbose=1)

Train on 30000 samples, validate on 10000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 00023: early stopping
Train on 30000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [26]:
# Use hard voting to combine these newly trained models
y_pred_log1 = softmax(w1.dot(x_test.T)).T
label_pred_log1 = [np.argmax(y_pred_log1[i]) for i in range(x_test.shape[0])]
y_usps_pred_log1 = softmax(w1.dot(x_usps_test.T)).T
label_usps_pred_log1 = [np.argmax(y_usps_pred_log1[i]) for i in range(x_usps_test.shape[0])]

label_pred_svc1 = svc_linear1.predict(x_test)
label_pred_rf1 = rf1.predict(x_test)
label_usps_pred_svc1 = svc_linear1.predict(x_usps_test)
label_usps_pred_rf1 = rf1.predict(x_usps_test)

y_pred_dnn1 = dnn1.predict(x_test)
label_pred_dnn1 = [np.argmax(y_pred_dnn1[i]) for i in range(len(y_pred_dnn1))]    
y_usps_pred_dnn1 = dnn1.predict(x_usps_test)
label_usps_pred_dnn1 = [np.argmax(y_usps_pred_dnn1[i]) for i in range(len(y_usps_pred_dnn1))]

y_pred_cnn1 = cnn1.predict(x_test_cnn)
label_pred_cnn1 = [np.argmax(y_pred_cnn1[i]) for i in range(len(y_pred_cnn1))]    
y_usps_pred_cnn1 = cnn1.predict(x_usps_test_cnn)
label_usps_pred_cnn1 = [np.argmax(y_usps_pred_cnn1[i]) for i in range(len(y_usps_pred_cnn1))]

label_pred_combine1 = []
for i in range(len(label_test)):
    label_temp = [label_pred_log1[i], label_pred_svc1[i], label_pred_rf1[i], label_pred_dnn1[i], label_pred_cnn1[i]]
    label_pred_combine1.append(max(set(label_temp), key=label_temp.count))
label_usps_pred_combine1 = []
for j in range(len(label_usps_test)):
    label_temp = [label_usps_pred_log1[j], label_usps_pred_svc1[j], label_usps_pred_rf1[j], label_usps_pred_dnn1[j], label_usps_pred_cnn1[j]]
    label_usps_pred_combine1.append(max(set(label_temp), key=label_temp.count))

In [27]:
# Calculate the accuracy of the combined model on different datasets
acc_test_combine1, acc_usps_test_combine1 = accuracy_score(label_test,label_pred_combine1), accuracy_score(label_usps_test,label_usps_pred_combine1)
print ('\n')
print ('----------------------------------------------------')
print ("-------Combined Model (Boosting) Results-------")
print ('----------------------------------------------------')
print ("accuracy Testing: {}".format(acc_test_combine1))    
print ("accuracy USPS: {}".format(acc_usps_test_combine1))   



----------------------------------------------------
-------Combined Model (Boosting) Results-------
----------------------------------------------------
accuracy Testing: 0.9806
accuracy USPS: 0.4786239311965598


In [28]:
# Calculate confusion matrices for both MNIST and USPS datasets
cf_combine1 = confusion_matrix(label_test, label_pred_combine1)
cf_usps_combine1 = confusion_matrix(label_usps_test, label_usps_pred_combine1)
print ('\n')
print ('Confusion matrix by the combined model (boosting) on MNIST test set:')
print (cf_combine1)
print ('Confusion matrix by the combined model (boosting) on USPS test set:')
print (cf_usps_combine1)  



Confusion matrix by the combined model (boosting) on MNIST test set:
[[ 970    0    1    1    0    1    3    0    4    0]
 [   0 1129    3    0    0    0    1    0    2    0]
 [   6    0 1011    1    1    0    1    7    4    1]
 [   0    0    4  992    0    3    0    4    7    0]
 [   2    0    2    0  968    0    2    0    0    8]
 [   2    1    0   10    0  866    4    1    6    2]
 [   6    2    2    1    3    3  940    0    1    0]
 [   2    5   14    2    0    0    0  999    2    4]
 [   3    0    4    3    4    2    1    2  951    4]
 [   7    7    0    3    6    0    0    4    2  980]]
Confusion matrix by the combined model (boosting) on USPS test set:
[[ 696    8  282   40  259   75   31   27   51  531]
 [  94  662   87  197  344   83   20  430   67   16]
 [ 109   19 1621   49   26   94   14   31   25   11]
 [  46    7  127 1477    6  271    3   20   31   12]
 [  19  122   47   20 1287   88    5  232  122   58]
 [  70   18   79  141   20 1577   16   22   44   13]
 [ 238   28 