In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier as mlp
import sklearn.metrics
from sklearn.model_selection import GridSearchCV

In [2]:
# Define useful functions
def create_output_file(y_actual, y_predicted, labels, filename):
    '''
    Function for creating the ouput file as specified in the assignment instructions:
    a) instance number and predicted label (number)
    b) Confusion matrix
    c) Precision, recall and f1-measure of each class
    d) Accuracy, macro-average f1 and weighted-average f1 score of the model

    y_actual: numpy array of shape (N,) containing the actual class of each test instance
    y_predicted: numpy array of shape (N,) containing the class of each test instance predicted by the model
    labels: 1D numpy array containing the class labels of the dataset
    filename: name of the output (.csv) file
    '''

    # Open file
    output = open('A1-Output/' + filename + '.csv', 'w')

    # a) Write y values of test data
    output.write('instance,prediction\n')
    for i in range(y_predicted.shape[0]):
        output.write(str(i+1) + ',' + str(y_predicted[i]) + '\n')

    output.write('\n')

    # b) Plot confusion matrix
    output.write('confusion matrix\n')
    confusion_matrix = sklearn.metrics.confusion_matrix(y_actual, y_predicted)
    (m, n) = confusion_matrix.shape
    for i in range(m):
        for j in range(n):
            if j < n-1:
                output.write(str(confusion_matrix[i,j]) + ',')
            else:
                output.write(str(confusion_matrix[i,j]))
        output.write('\n')

    output.write('\n')

    # c) Write precision, recall and f1-measure of each class (rounded to 2 decimals)
    output.write('precision,recall,f1-measure\n')
    precision = sklearn.metrics.precision_score(y_actual, y_predicted, average=None)
    recall = sklearn.metrics.recall_score(y_actual, y_predicted, average=None)
    f1 = sklearn.metrics.f1_score(y_actual, y_predicted, average=None)

    for i in range(labels.shape[0]):
        #output.write(str(precision[i]) + ',' + str(recall[i]) + ',' + str(f1[i]) + '\n')
        output.write('{:.2f},{:.2f},{:.2f}\n'.format(precision[i], recall[i], f1[i]))

    output.write('\n')

    # Write accuracy, macro-average f1 and weighted-average f1 of the model (rounded to 2 decimals)
    output.write('accuracy,macro-average f-1,weighted-average f1\n') 
    accuracy = sklearn.metrics.accuracy_score(y_actual, y_predicted)
    macro_avg_f1 = sklearn.metrics.f1_score(y_actual, y_predicted, average='macro')
    weighted_avg_f1 = sklearn.metrics.f1_score(y_actual, y_predicted, average='weighted')
    #output.write(str(accuracy) + ',' + str(macro_avg_f1) + ',' + str(weighted_avg_f1))
    output.write('{:.2f},{:.2f},{:.2f}\n'.format(accuracy, macro_avg_f1, weighted_avg_f1))

    # Close output file
    output.close()

def load_dataset(filename, nb_pixels=32**2):
    '''
    Function for loading the X and Y data of the passed csv file

    filename: name of the file containing the dataset (ex: train_1)

    Return: 
    X: 2D numpy array containing the value of the features of each instance
    Y: 1D numpy array containing the true class of each instance 
    '''
    data = np.loadtxt('Assig1-Dataset/' + filename + '.csv', delimiter=',', dtype=np.int32)
    return data[:, :nb_pixels], data[:, nb_pixels]


In [3]:
#Loading training and validation data for both datasets
#dataset 1
ds1_labels = np.loadtxt('Assig1-Dataset/info_1.csv', skiprows=1, usecols=1, delimiter=',', dtype=np.str)

ds1_training_X, ds1_training_Y = load_dataset('train_1')
ds1_val_X, ds1_val_Y = load_dataset('val_1')

#dataset 2
ds2_labels = np.loadtxt('Assig1-Dataset/info_2.csv', skiprows=1, usecols=1, delimiter=',', dtype=np.str)

ds2_training_X, ds2_training_Y = load_dataset('train_2')
ds2_val_X, ds2_val_Y = load_dataset('val_2')

In [None]:
#Gridsearch for best parameters for dataset 1
best_mlp = mlp(max_iter=3500)
parameters = {'hidden_layer_sizes':[(1500,100), (50,50), (50,30,50)], 'activation': ['identity', 'relu','tanh','logistic'], 'solver':['adam', 'sgd']}
clf = GridSearchCV(best_mlp, parameters)
clf.fit(ds1_training_X, ds1_training_Y)
print('Best parameters found:\n', clf.best_params_)

In [6]:
#Training Best-MLP model for dataset 1
bestMLP_ds1 = mlp(hidden_layer_sizes=(1500,100), activation='logistic',solver='adam', max_iter=4000).fit(ds1_training_X,ds1_training_Y)



In [15]:
#Training model for dataset 1
ds1_val_Y_predict = bestMLP_ds1.predict(ds1_val_X)

print (sklearn.metrics.confusion_matrix(ds1_val_Y, ds1_val_Y_predict))
print ('\n')
print (sklearn.metrics.classification_report(ds1_val_Y, ds1_val_Y_predict, target_names=ds1_labels)) # precision, recall, f1-measure (macro and weighted) and accuracy

[[ 8  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  0  0
   0  0]
 [ 0  7  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   1  0]
 [ 0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  1  0  7  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  6  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  6  0  0  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  8  0  0  0  0  0  0  0  1  0  1  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  6  0  0  0  0  1  0  0  1  0  0  0  0  0  1  0  0
   0  0]
 [ 0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 1  0  0  0  0  1  0  0  0  8  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 10  0  0  0  0  0  0  0  0  0  0  0  0
   0  0]
 [ 0

In [4]:
#Gridsearch for best parameters for dataset 2
best_mlp = mlp(max_iter=3500)
parameters = {'hidden_layer_sizes':[(1500,100), (50,50), (50,30,50)], 'activation': ['identity', 'relu','tanh','logistic'], 'solver':['adam', 'sgd']}
clf = GridSearchCV(best_mlp, parameters, verbose=5)
clf.fit(ds2_training_X, ds2_training_Y)
print('Best parameters found:\n', clf.best_params_)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=adam 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=adam, score=0.817, total= 4.3min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=adam 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.4min remaining:    0.0s


[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=adam, score=0.821, total= 2.4min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=adam 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  6.8min remaining:    0.0s


[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=adam, score=0.823, total= 1.8min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=adam 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  8.6min remaining:    0.0s


[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=adam, score=0.838, total= 5.2min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=adam 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 13.8min remaining:    0.0s


[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=adam, score=0.833, total= 3.1min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd .
[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.828, total=25.3min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd .
[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.829, total=22.4min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd .
[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.819, total=23.3min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd .
[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.829, total=30.0min
[CV] activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd .
[CV]  activation=identity, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.814, total=35.2min
[CV] activation=identity, hidden_layer_sizes=(50, 50), s

[CV]  activation=relu, hidden_layer_sizes=(50, 30, 50), solver=adam, score=0.867, total= 1.2min
[CV] activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd ....
[CV]  activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.862, total= 3.1min
[CV] activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd ....
[CV]  activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.857, total= 3.7min
[CV] activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd ....
[CV]  activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.860, total= 2.9min
[CV] activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd ....
[CV]  activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.865, total= 3.3min
[CV] activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd ....
[CV]  activation=relu, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.851, total= 3.0min
[CV] activation=tanh, hidden_layer_sizes=(1500, 100), solver=adam ....
[CV



[CV]  activation=logistic, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.840, total=161.7min
[CV] activation=logistic, hidden_layer_sizes=(1500, 100), solver=sgd .
[CV]  activation=logistic, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.851, total=239.0min
[CV] activation=logistic, hidden_layer_sizes=(1500, 100), solver=sgd .




[CV]  activation=logistic, hidden_layer_sizes=(1500, 100), solver=sgd, score=0.858, total=161.9min
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=adam ...




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=adam, score=0.878, total=  52.4s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=adam ...




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=adam, score=0.866, total=  26.1s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=adam ...




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=adam, score=0.879, total= 1.3min
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=adam ...




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=adam, score=0.883, total= 1.2min
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=adam ...




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=adam, score=0.528, total=   5.4s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd ....




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd, score=0.239, total=   3.2s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd ....




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd, score=0.106, total=   1.0s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd ....




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd, score=0.256, total=   3.7s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd ....




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd, score=0.414, total=   3.9s
[CV] activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd ....




[CV]  activation=logistic, hidden_layer_sizes=(50, 50), solver=sgd, score=0.240, total=   0.7s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam, score=0.240, total=   0.7s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam, score=0.476, total=   3.2s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam, score=0.240, total=   0.4s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam, score=0.475, total=   3.6s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=adam, score=0.474, total=   4.2s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.240, total=   5.2s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.240, total=   3.1s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.048, total=   1.6s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd 




[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.240, total=   2.2s
[CV] activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd 


[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed: 2053.2min finished


[CV]  activation=logistic, hidden_layer_sizes=(50, 30, 50), solver=sgd, score=0.240, total=   1.7s
Best parameters found:
 {'activation': 'logistic', 'hidden_layer_sizes': (1500, 100), 'solver': 'adam'}




In [7]:
#Training Best-MLP model for dataset 2
bestMLP_ds2 = mlp(hidden_layer_sizes=(1500,100), activation='tanh',solver='adam', max_iter=4000).fit(ds2_training_X,ds2_training_Y)
#activation=logistics and the same other parameters is equally good, but tanh is faster, hence why I chose it

In [5]:
#Training model for dataset 2
ds2_val_Y_predict = bestMLP_ds2.predict(ds2_val_X)

print('Best parameters found:\n', clf.best_params_)
print (sklearn.metrics.confusion_matrix(ds2_val_Y, ds2_val_Y_predict))
print ('\n')
print (sklearn.metrics.classification_report(ds2_val_Y, ds2_val_Y_predict, target_names=ds2_labels)) # precision, recall, f1-measure (macro and weighted) and accuracy

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/inesgonzalezpepe/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-755e1f90baee>", line 2, in <module>
    ds2_val_Y_predict = bestMLP_ds2.predict(ds2_val_X)
NameError: name 'bestMLP_ds2' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/inesgonzalezpepe/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/inesgonzalezpepe/opt/anaconda3/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1169, in get_records
    return _fixed_getinnerframes(et

TypeError: object of type 'NoneType' has no len()

In [14]:
#Testing for dataset 1
ds1_test_X, ds1_test_Y = load_dataset('test_with_label_1')
ds1_test_Y_predict = bestMLP_ds1.predict(ds1_test_X)

print("DATASET 1\n")
print (sklearn.metrics.confusion_matrix(ds1_test_Y, ds1_test_Y_predict)) # confusion matrix
print ('\n')
print (sklearn.metrics.classification_report(ds1_test_Y, ds1_test_Y_predict, target_names=ds1_labels)) # precision, recall, f1-measure (macro and weighted average) and accuracy

#Testing for dataset 2
ds2_test_X, ds2_test_Y = load_dataset('test_with_label_2')
ds2_test_Y_predict = bestMLP_ds2.predict(ds2_test_X)

print("\n=====================================================\n")
print("DATASET 2\n")
print (sklearn.metrics.confusion_matrix(ds2_test_Y, ds2_test_Y_predict)) # confusion matrix
print ('\n')
print (sklearn.metrics.classification_report(ds2_test_Y, ds2_test_Y_predict, target_names=ds2_labels)) # precision, recall, f1-measure (macro and weighted average) and accuracy



# Write test results to output file GNB-DS1.csv
create_output_file(ds1_test_Y, ds1_test_Y_predict, ds1_labels, 'Best-MLP-DS1')
create_output_file(ds2_test_Y, ds2_test_Y_predict, ds2_labels, 'Best-MLP-DS2')

DATASET 1

[[4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0]