In [6]:
import numpy as np
import sklearn
from sklearn.neural_network import MLPClassifier as mlp
import sklearn.metrics

In [7]:
# Define useful functions
def create_output_file(y_actual, y_predicted, labels, filename):
    '''
    Function for creating the ouput file as specified in the assignment instructions:
    a) instance number and predicted label (number)
    b) Confusion matrix
    c) Precision, recall and f1-measure of each class
    d) Accuracy, macro-average f1 and weighted-average f1 score of the model

    y_actual: numpy array of shape (N,) containing the actual class of each test instance
    y_predicted: numpy array of shape (N,) containing the class of each test instance predicted by the model
    labels: 1D numpy array containing the class labels of the dataset
    filename: name of the output (.csv) file
    '''

    # Open file
    output = open('A1-Output/' + filename + '.csv', 'w')

    # a) Write y values of test data
    output.write('instance,prediction\n')
    for i in range(y_predicted.shape[0]):
        output.write(str(i+1) + ',' + str(y_predicted[i]) + '\n')

    output.write('\n')

    # b) Plot confusion matrix
    output.write('confusion matrix\n')
    confusion_matrix = sklearn.metrics.confusion_matrix(y_actual, y_predicted)
    (m, n) = confusion_matrix.shape
    for i in range(m):
        for j in range(n):
            if j < n-1:
                output.write(str(confusion_matrix[i,j]) + ',')
            else:
                output.write(str(confusion_matrix[i,j]))
        output.write('\n')

    output.write('\n')

    # c) Write precision, recall and f1-measure of each class (rounded to 2 decimals)
    output.write('precision,recall,f1-measure\n')
    precision = sklearn.metrics.precision_score(y_actual, y_predicted, average=None)
    recall = sklearn.metrics.recall_score(y_actual, y_predicted, average=None)
    f1 = sklearn.metrics.f1_score(y_actual, y_predicted, average=None)

    for i in range(labels.shape[0]):
        #output.write(str(precision[i]) + ',' + str(recall[i]) + ',' + str(f1[i]) + '\n')
        output.write('{:.2f},{:.2f},{:.2f}\n'.format(precision[i], recall[i], f1[i]))

    output.write('\n')

    # Write accuracy, macro-average f1 and weighted-average f1 of the model (rounded to 2 decimals)
    output.write('accuracy,macro-average f-1,weighted-average f1\n') 
    accuracy = sklearn.metrics.accuracy_score(y_actual, y_predicted)
    macro_avg_f1 = sklearn.metrics.f1_score(y_actual, y_predicted, average='macro')
    weighted_avg_f1 = sklearn.metrics.f1_score(y_actual, y_predicted, average='weighted')
    #output.write(str(accuracy) + ',' + str(macro_avg_f1) + ',' + str(weighted_avg_f1))
    output.write('{:.2f},{:.2f},{:.2f}\n'.format(accuracy, macro_avg_f1, weighted_avg_f1))

    # Close output file
    output.close()

def load_dataset(filename, nb_pixels=32**2):
    '''
    Function for loading the X and Y data of the passed csv file

    filename: name of the file containing the dataset (ex: train_1)

    Return: 
    X: 2D numpy array containing the value of the features of each instance
    Y: 1D numpy array containing the true class of each instance 
    '''
    data = np.loadtxt('Assig1-Dataset/' + filename + '.csv', delimiter=',', dtype=np.int32)
    return data[:, :nb_pixels], data[:, nb_pixels]


In [8]:
#Loading training and validation data for both datasets
#dataset 1
ds1_labels = np.loadtxt('Assig1-Dataset/info_1.csv', skiprows=1, usecols=1, delimiter=',', dtype=np.str)

ds1_training_X, ds1_training_Y = load_dataset('train_1')
ds1_val_X, ds1_val_Y = load_dataset('val_1')

#dataset 2
ds2_labels = np.loadtxt('Assig1-Dataset/info_2.csv', skiprows=1, usecols=1, delimiter=',', dtype=np.str)

ds2_training_X, ds2_training_Y = load_dataset('train_2')
ds2_val_X, ds2_val_Y = load_dataset('val_2')


In [9]:
#Training Base-MLP models
baseMLP_ds1 = mlp(activation='logistic',solver='sgd', max_iter=4000).fit(ds1_training_X,ds1_training_Y)
baseMLP_ds2 = mlp(activation='logistic',solver='sgd', max_iter=4000).fit(ds2_training_X,ds2_training_Y)

In [10]:
#Training model for dataset 1
ds1_val_Y_predict = baseMLP_ds1.predict(ds1_val_X)

print (sklearn.metrics.confusion_matrix(ds1_val_Y, ds1_val_Y_predict))
print ('\n')
print (sklearn.metrics.classification_report(ds1_val_Y, ds1_val_Y_predict, target_names=ds1_labels)) # precision, recall, f1-measure (macro and weighted) and accuracy

[[ 9  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  6  0  0  0  1  0  0  0  1  0  0  0  0  0  0  0  1  0  0  0  0  0  0
   0  0]
 [ 0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  1  0  6  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  6  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  7  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  8  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  6  0  0  0  0  0  0  0  1  0  0  0  0  0  1  1  0
   0  0]
 [ 0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 1  0  0  0  0  0  0  0  0  8  0  0  0  0  0  0  0  0  0  1  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  9  1  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0

In [62]:
#Training model for dataset 2
ds2_val_Y_predict = baseMLP_ds2.predict(ds2_val_X)

print (sklearn.metrics.confusion_matrix(ds2_val_Y, ds2_val_Y_predict))
print ('\n')
print (sklearn.metrics.classification_report(ds2_val_Y, ds2_val_Y_predict, target_names=ds2_labels)) # precision, recall, f1-measure (macro and weighted) and accuracy

[[150   0   0   0   6   0   0   0   6   3]
 [  4 367   1   0   1   1   1   0   0   0]
 [  1   0  29   1   2   2   0   0   8   2]
 [  1   2   0  38   2   1   0   0   1   0]
 [  8   0   1   0 119   5   1   0   1  15]
 [  0   4   0   2   2 151   0   0   1   5]
 [  0   3   0   0   0   0  41   0   1   0]
 [  1   0   0   0   0   2   0  41   0   1]
 [  3   5   1   0   5   0   0   0 134   2]
 [  2   3   2   0   8   9   3   0   5 343]]


              precision    recall  f1-score   support

          pi       0.88      0.91      0.90       165
       alpha       0.96      0.98      0.97       375
        beta       0.85      0.64      0.73        45
       sigma       0.93      0.84      0.88        45
       gamma       0.82      0.79      0.81       150
       delta       0.88      0.92      0.90       165
      lambda       0.89      0.91      0.90        45
       omega       1.00      0.91      0.95        45
          mu       0.85      0.89      0.87       150
          xi       0.92   

In [11]:
#Testing for dataset 1
ds1_test_X, ds1_test_Y = load_dataset('test_with_label_1')
ds1_test_Y_predict = baseMLP_ds1.predict(ds1_test_X)

print("DATASET 1\n")
print (sklearn.metrics.confusion_matrix(ds1_test_Y, ds1_test_Y_predict)) # confusion matrix
print ('\n')
print (sklearn.metrics.classification_report(ds1_test_Y, ds1_test_Y_predict, target_names=ds1_labels)) # precision, recall, f1-measure (macro and weighted average) and accuracy

#Testing for dataset 2
ds2_test_X, ds2_test_Y = load_dataset('test_with_label_2')
ds2_test_Y_predict = baseMLP_ds2.predict(ds2_test_X)

print("\n=====================================================\n")
print("DATASET 2\n")
print (sklearn.metrics.confusion_matrix(ds2_test_Y, ds2_test_Y_predict)) # confusion matrix
print ('\n')
print (sklearn.metrics.classification_report(ds2_test_Y, ds2_test_Y_predict, target_names=ds2_labels)) # precision, recall, f1-measure (macro and weighted average) and accuracy



# Write test results to output file GNB-DS1.csv
create_output_file(ds1_test_Y, ds1_test_Y_predict, ds1_labels, 'Base-MLP-DS1')
create_output_file(ds2_test_Y, ds2_test_Y_predict, ds2_labels, 'Base-MLP-DS2')

DATASET 1

[[4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 3 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0]