# Imports

In [1]:
import random
import numpy as np
from data_process import get_FASHION_data, get_RICE_data
from scipy.spatial import distance
from models import Perceptron, SVM, Softmax, Logistic
from kaggle_submission import output_submission_csv
%matplotlib inline

# For auto-reloading external modules
# See http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

# Loading Fashion-MNIST

In the following cells we determine the number of images for each split and load the images.
<br /> 
TRAIN_IMAGES + VAL_IMAGES = (0, 60000]
, TEST_IMAGES = 10000

In [2]:
# You can change these numbers for experimentation
# For submission we will use the default values 
TRAIN_IMAGES = 50000
VAL_IMAGES = 10000
normalize = True

In [3]:
data = get_FASHION_data(TRAIN_IMAGES, VAL_IMAGES, normalize=normalize)
X_train_fashion, y_train_fashion = data['X_train'], data['y_train']
X_val_fashion, y_val_fashion = data['X_val'], data['y_val']
X_test_fashion, y_test_fashion = data['X_test'], data['y_test']
n_class_fashion = len(np.unique(y_test_fashion))

# Loading Rice

In [4]:
# loads train / test / val splits of 80%, 20%, 20% 
data = get_RICE_data()
X_train_RICE, y_train_RICE = data['X_train'], data['y_train']
X_val_RICE, y_val_RICE = data['X_val'], data['y_val']
X_test_RICE, y_test_RICE = data['X_test'], data['y_test']
n_class_RICE = len(np.unique(y_test_RICE))

print("Number of train samples: ", X_train_RICE.shape[0])
print("Number of val samples: ", X_val_RICE.shape[0])
print("Number of test samples: ", X_test_RICE.shape[0])

Number of train samples:  10911
Number of val samples:  3637
Number of test samples:  3637


### Get Accuracy

This function computes how well your model performs using accuracy as a metric.

In [5]:
def get_acc(pred, y_test):
    return np.sum(y_test == pred) / len(y_test) * 100

# Perceptron

Perceptron has 2 hyperparameters that you can experiment with:
- **Learning rate** - controls how much we change the current weights of the classifier during each update. We set it at a default value of 0.5, but you should experiment with different values. We recommend changing the learning rate by factors of 10 and observing how the performance of the classifier changes. You should also try adding a **decay** which slowly reduces the learning rate over each epoch.
- **Number of Epochs** - An epoch is a complete iterative pass over all of the data in the dataset. During an epoch we predict a label using the classifier and then update the weights of the classifier according to the perceptron update rule for each sample in the training set. You should try different values for the number of training epochs and report your results.

You will implement the Perceptron classifier in the **models/perceptron.py**

The following code: 
- Creates an instance of the Perceptron classifier class 
- The train function of the Perceptron class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy


## Train Perceptron on Fashion-MNIST

In [6]:
lr = 0.1
n_epochs = 50

percept_fashion = Perceptron(n_class_fashion, lr, n_epochs)
percept_fashion.train(X_train_fashion, y_train_fashion)

Epoch 0 Accuracy 6.948
Epoch 1 Accuracy 80.778
Epoch 2 Accuracy 78.302
Epoch 3 Accuracy 80.42399999999999
Epoch 4 Accuracy 81.42399999999999
Epoch 5 Accuracy 83.298
Epoch 6 Accuracy 83.55
Epoch 7 Accuracy 84.078
Epoch 8 Accuracy 83.082
Epoch 9 Accuracy 83.50800000000001
Epoch 10 Accuracy 82.87400000000001
Epoch 11 Accuracy 82.77799999999999
Epoch 12 Accuracy 83.6
Epoch 13 Accuracy 82.896
Epoch 14 Accuracy 82.446
Epoch 15 Accuracy 83.738
Epoch 16 Accuracy 81.104
Epoch 17 Accuracy 83.174
Epoch 18 Accuracy 83.562
Epoch 19 Accuracy 83.658
Epoch 20 Accuracy 83.646
Epoch 21 Accuracy 84.17999999999999
Epoch 22 Accuracy 84.19
Epoch 23 Accuracy 83.932
Epoch 24 Accuracy 83.768
Epoch 25 Accuracy 84.858
Epoch 26 Accuracy 84.084
Epoch 27 Accuracy 83.788
Epoch 28 Accuracy 84.97200000000001
Epoch 29 Accuracy 84.664
Epoch 30 Accuracy 84.82
Epoch 31 Accuracy 84.958
Epoch 32 Accuracy 84.822
Epoch 33 Accuracy 84.94
Epoch 34 Accuracy 84.672
Epoch 35 Accuracy 84.634
Epoch 36 Accuracy 84.858
Epoch 37 Accura

In [7]:
pred_percept = percept_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_fashion)))

The training accuracy is given by: 84.800000


### Validate Perceptron on Fashion-MNIST

In [8]:
pred_percept = percept_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_fashion)))

The validation accuracy is given by: 80.210000


### Test Perceptron on Fashion-MNIST

In [9]:
pred_percept = percept_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_fashion)))

The testing accuracy is given by: 79.710000


### Perceptron_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy, output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [10]:
output_submission_csv('kaggle/perceptron_submission_fashion.csv', percept_fashion.predict(X_test_fashion))

## Train Perceptron on Rice

In [11]:
lr = 0.3
n_epochs = 100

percept_RICE = Perceptron(n_class_RICE, lr, n_epochs)
percept_RICE.train(X_train_RICE, y_train_RICE)

Epoch 0 Accuracy 54.779580240124645
Epoch 1 Accuracy 71.6524608193566
Epoch 2 Accuracy 58.042342590046744
Epoch 3 Accuracy 54.90789111905416
Epoch 4 Accuracy 90.50499495921547
Epoch 5 Accuracy 54.779580240124645
Epoch 6 Accuracy 98.86353221519568
Epoch 7 Accuracy 98.92768765466043
Epoch 8 Accuracy 99.78920355604436
Epoch 9 Accuracy 94.51929245715334
Epoch 10 Accuracy 99.84419393272844
Epoch 11 Accuracy 99.88085418385117
Epoch 12 Accuracy 99.89918430941252
Epoch 13 Accuracy 99.26679497754559
Epoch 14 Accuracy 99.83502886994776
Epoch 15 Accuracy 99.87168912107049
Epoch 16 Accuracy 99.83502886994776
Epoch 17 Accuracy 99.83502886994776
Epoch 18 Accuracy 99.86252405828981
Epoch 19 Accuracy 99.93584456053523
Epoch 20 Accuracy 99.88085418385117
Epoch 21 Accuracy 99.89001924663185
Epoch 22 Accuracy 99.88085418385117
Epoch 23 Accuracy 99.92667949775455
Epoch 24 Accuracy 99.93584456053523
Epoch 25 Accuracy 99.93584456053523
Epoch 26 Accuracy 99.84419393272844
Epoch 27 Accuracy 99.91751443497388


In [12]:
pred_percept = percept_RICE.predict(X_train_RICE)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_RICE)))

The training accuracy is given by: 99.935845


### Validate Perceptron on Rice

In [13]:
pred_percept = percept_RICE.predict(X_val_RICE)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_RICE)))

The validation accuracy is given by: 99.917514


### Test Perceptron on Rice

In [14]:
pred_percept = percept_RICE.predict(X_test_RICE)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_RICE)))

The testing accuracy is given by: 99.917514


# Support Vector Machines (with SGD)

Next, you will implement a "soft margin" SVM. In this formulation you will maximize the margin between positive and negative training examples and penalize margin violations using a hinge loss.

We will optimize the SVM loss using SGD. This means you must compute the loss function with respect to model weights. You will use this gradient to update the model weights.

SVM optimized with SGD has 3 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Epochs** - similar to as defined above in Perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case it is a coefficient on the term which maximizes the margin. You could try different values. The default value is set to 0.05.

You will implement the SVM using SGD in the **models/svm.py**

The following code: 
- Creates an instance of the SVM classifier class 
- The train function of the SVM class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Train SVM on Fashion-MNIST

In [15]:
lr = 1
n_epochs = 50
reg_const = 0.05

svm_fashion = SVM(n_class_fashion, lr, n_epochs, reg_const)
svm_fashion.train(X_train_fashion, y_train_fashion)

Epoch 0 Accuracy 10.91
Epoch 1 Accuracy 76.776
Epoch 2 Accuracy 79.252
Epoch 3 Accuracy 80.054
Epoch 4 Accuracy 81.17
Epoch 5 Accuracy 83.94
Epoch 6 Accuracy 83.538
Epoch 7 Accuracy 84.048
Epoch 8 Accuracy 84.39999999999999
Epoch 9 Accuracy 84.2
Epoch 10 Accuracy 84.446
Epoch 11 Accuracy 84.37
Epoch 12 Accuracy 84.36
Epoch 13 Accuracy 84.304
Epoch 14 Accuracy 84.39
Epoch 15 Accuracy 84.406
Epoch 16 Accuracy 84.348
Epoch 17 Accuracy 84.36399999999999
Epoch 18 Accuracy 84.414
Epoch 19 Accuracy 84.392
Epoch 20 Accuracy 84.396
Epoch 21 Accuracy 84.422
Epoch 22 Accuracy 84.402
Epoch 23 Accuracy 84.39999999999999
Epoch 24 Accuracy 84.39999999999999
Epoch 25 Accuracy 84.396
Epoch 26 Accuracy 84.398
Epoch 27 Accuracy 84.396
Epoch 28 Accuracy 84.39999999999999
Epoch 29 Accuracy 84.404
Epoch 30 Accuracy 84.402
Epoch 31 Accuracy 84.39999999999999
Epoch 32 Accuracy 84.39999999999999
Epoch 33 Accuracy 84.39999999999999
Epoch 34 Accuracy 84.39999999999999
Epoch 35 Accuracy 84.39999999999999
Epoch 36

In [16]:
pred_svm = svm_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_fashion)))

The training accuracy is given by: 84.400000


### Validate SVM on Fashion-MNIST

In [17]:
pred_svm = svm_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_fashion)))

The validation accuracy is given by: 82.740000


### Test SVM on Fashion-MNIST

In [18]:
pred_svm = svm_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_fashion)))

The testing accuracy is given by: 82.310000


### SVM_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [19]:
output_submission_csv('kaggle/svm_submission_fashion.csv', svm_fashion.predict(X_test_fashion))

## Train SVM on Rice

In [20]:
lr = 1
n_epochs = 50
reg_const = 0.05

svm_RICE = SVM(n_class_RICE, lr, n_epochs, reg_const)
svm_RICE.train(X_train_RICE, y_train_RICE)

Epoch 0 Accuracy 54.779580240124645
Epoch 1 Accuracy 55.77857208321877
Epoch 2 Accuracy 72.75226835303822
Epoch 3 Accuracy 75.0985244248923
Epoch 4 Accuracy 78.97534598111997
Epoch 5 Accuracy 78.85620016497114
Epoch 6 Accuracy 78.75538447438365
Epoch 7 Accuracy 79.14948217395289
Epoch 8 Accuracy 78.7278892860416
Epoch 9 Accuracy 78.67289890935753
Epoch 10 Accuracy 78.99367610668133
Epoch 11 Accuracy 79.0120062322427
Epoch 12 Accuracy 79.14031711117221
Epoch 13 Accuracy 78.87453029053249
Epoch 14 Accuracy 79.04866648336541
Epoch 15 Accuracy 79.0120062322427
Epoch 16 Accuracy 79.04866648336541
Epoch 17 Accuracy 79.02117129502338
Epoch 18 Accuracy 79.04866648336541
Epoch 19 Accuracy 79.03033635780406
Epoch 20 Accuracy 79.05783154614609
Epoch 21 Accuracy 79.04866648336541
Epoch 22 Accuracy 79.03950142058474
Epoch 23 Accuracy 79.05783154614609
Epoch 24 Accuracy 79.05783154614609
Epoch 25 Accuracy 79.05783154614609
Epoch 26 Accuracy 79.05783154614609
Epoch 27 Accuracy 79.05783154614609
Epoch

In [21]:
pred_svm = svm_RICE.predict(X_train_RICE)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_RICE)))

The training accuracy is given by: 79.057832


### Validate SVM on Rice

In [22]:
pred_svm = svm_RICE.predict(X_val_RICE)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_RICE)))

The validation accuracy is given by: 78.608743


## Test SVM on Rice

In [23]:
pred_svm = svm_RICE.predict(X_test_RICE)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_RICE)))

The testing accuracy is given by: 79.323618


# Softmax Classifier (with SGD)

Next, you will train a Softmax classifier. This classifier consists of a linear function of the input data followed by a softmax function which outputs a vector of dimension C (number of classes) for each data point. Each entry of the softmax output vector corresponds to a confidence in one of the C classes, and like a probability distribution, the entries of the output vector sum to 1. We use a cross-entropy loss on this sotmax output to train the model. 

Check the following link as an additional resource on softmax classification: http://cs231n.github.io/linear-classify/#softmax

Once again we will train the classifier with SGD. This means you need to compute the gradients of the softmax cross-entropy loss function according to the weights and update the weights using this gradient. Check the following link to help with implementing the gradient updates: https://deepnotes.io/softmax-crossentropy

The softmax classifier has 3 hyperparameters that you can experiment with:
- **Learning rate** - As above, this controls how much the model weights are updated with respect to their gradient.
- **Number of Epochs** - As described for perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case, we minimize the L2 norm of the model weights as regularization, so the regularization constant is a coefficient on the L2 norm in the combined cross-entropy and regularization objective.

You will implement a softmax classifier using SGD in the **models/softmax.py**

The following code: 
- Creates an instance of the Softmax classifier class 
- The train function of the Softmax class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Train Softmax on Fashion-MNIST

In [24]:
lr = 0.5
n_epochs = 20
reg_const = 1

softmax_fashion = Softmax(n_class_fashion, lr, n_epochs, reg_const)
softmax_fashion.train(X_train_fashion, y_train_fashion)

Epoch 0 Accuracy 12.076
Epoch 1 Accuracy 81.382
Epoch 2 Accuracy 70.86
Epoch 3 Accuracy 79.062
Epoch 4 Accuracy 79.214
Epoch 5 Accuracy 76.168
Epoch 6 Accuracy 80.49199999999999
Epoch 7 Accuracy 79.318
Epoch 8 Accuracy 80.866
Epoch 9 Accuracy 77.64
Epoch 10 Accuracy 80.116
Epoch 11 Accuracy 82.902
Epoch 12 Accuracy 81.306
Epoch 13 Accuracy 84.652
Epoch 14 Accuracy 80.952
Epoch 15 Accuracy 84.512
Epoch 16 Accuracy 85.858
Epoch 17 Accuracy 84.49
Epoch 18 Accuracy 83.212
Epoch 19 Accuracy 82.74000000000001


In [25]:
pred_softmax = softmax_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_fashion)))

The training accuracy is given by: 85.448000


### Validate Softmax on Fashion-MNIST

In [26]:
pred_softmax = softmax_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_fashion)))

The validation accuracy is given by: 81.620000


### Testing Softmax on Fashion-MNIST

In [27]:
pred_softmax = softmax_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_fashion)))

The testing accuracy is given by: 80.570000


### Softmax_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [28]:
output_submission_csv('kaggle/softmax_submission_fashion.csv', softmax_fashion.predict(X_test_fashion))

## Train Softmax on Rice

In [29]:
lr = 0.5
n_epochs = 10
reg_const = 0.05

softmax_RICE = Softmax(n_class_RICE, lr, n_epochs, reg_const)
softmax_RICE.train(X_train_RICE, y_train_RICE)

Epoch 0 Accuracy 48.47401704701677
Epoch 1 Accuracy 60.18696728072588
Epoch 2 Accuracy 75.55677756392632
Epoch 3 Accuracy 78.92035560443588
Epoch 4 Accuracy 75.55677756392632
Epoch 5 Accuracy 75.30932086884795
Epoch 6 Accuracy 59.27962606543855
Epoch 7 Accuracy 75.21767024104115
Epoch 8 Accuracy 65.98845202089633
Epoch 9 Accuracy 70.06690495829896


In [30]:
pred_softmax = softmax_RICE.predict(X_train_RICE)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_RICE)))

The training accuracy is given by: 76.729906


### Validate Softmax on Rice

In [31]:
pred_softmax = softmax_RICE.predict(X_val_RICE)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_RICE)))

The validation accuracy is given by: 75.996701


### Testing Softmax on Rice

In [32]:
pred_softmax = softmax_RICE.predict(X_test_RICE)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_RICE)))

The testing accuracy is given by: 76.656585


# Logistic Classifier

The Logistic Classifier has 2 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Number of Epochs** - As described for perceptron.
- **Threshold** - The decision boundary of the classifier.


You will implement the Logistic Classifier in the **models/logistic.py**

The following code: 
- Creates an instance of the Logistic classifier class 
- The train function of the Logistic class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

### Training Logistic Classifer

In [33]:
learning_rate = 0.2
n_epochs = 100
threshold = 0

y_train_RICE = np.where(y_train_RICE == 0, -1, y_train_RICE)

lr = Logistic(learning_rate, n_epochs, threshold)
lr.train(X_train_RICE, y_train_RICE)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99


In [34]:
pred_lr = lr.predict(X_train_RICE)
print('The training accuracy is given by: %f' % (get_acc(pred_lr, y_train_RICE)))

The training accuracy is given by: 99.853359


### Validate Logistic Classifer

In [35]:
y_val_RICE = np.where(y_val_RICE == 0, -1, y_val_RICE)
pred_lr = lr.predict(X_val_RICE)
print('The validation accuracy is given by: %f' % (get_acc(pred_lr, y_val_RICE)))

The validation accuracy is given by: 99.752543


### Test Logistic Classifier

In [36]:
y_test_RICE = np.where(y_test_RICE == 0, -1, y_test_RICE)
pred_lr = lr.predict(X_test_RICE)
print('The testing accuracy is given by: %f' % (get_acc(pred_lr, y_test_RICE)))

The testing accuracy is given by: 99.725048
