In [1]:
from builtins import range
import numpy as np
from random import shuffle
from sklearn.model_selection import train_test_split
from past.builtins import xrange
import pandas as pd

In [2]:
#Loading training data features
train = pd.read_csv("fashion-mnist_train.csv")
trainX = train.loc[:,train.columns != 'label'].to_numpy()
trainX = trainX.astype(np.int32)


In [3]:
#Loading test data features
test = pd.read_csv("fashion-mnist_test.csv")
X_test = test.loc[:,test.columns != 'label'].to_numpy()
X_test = X_test.astype(np.int32)

In [4]:
# Loading test labels
y_test = test.iloc[:,test.columns == 'label'].to_numpy()
y_test = y_test.astype(np.int32)

In [6]:
X_test.shape

(10000, 784)

In [5]:
#Loading training data labels
trainY = train.iloc[:,test.columns == 'label'].to_numpy()
trainY = trainY.astype(np.int32)

In [7]:
# Dividing train data into train and validation (random)
X_train, X_val, y_train, y_val = train_test_split(trainX, trainY, test_size=0.2)

In [8]:
y_train = np.reshape(y_train,(y_train.shape[0]))
y_val = np.reshape(y_val,(y_val.shape[0]))
y_test = np.reshape(y_test,(y_test.shape[0]))

In [9]:
# Adding bias term
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])

In [10]:
X_train.shape,X_val.shape,X_test.shape

((48000, 785), (12000, 785), (10000, 785))

In [11]:
def svm_loss(W, X, y, reg):
    loss = 0.0
    dW = np.zeros(W.shape) # initialize the gradient as zero
    N = X.shape[0]
    D = X.shape[1]
    C = W.shape[1]
    
    #compute scores
    scores = X.dot(W) # N x C
    
    # Record the score of the example's correct class
    correct_class_idx = tuple([range(scores.shape[0]), y])
    correct_class_scores = scores[correct_class_idx]
    
    # Compute for the margin by getting the max between 0 and the computed expression
    losses = scores - np.reshape(correct_class_scores,(scores.shape[0],1)) + 1
    losses[correct_class_idx] = 0
    losses = losses.clip(min=0)
    loss = np.sum(losses)
    
    # This mask can flag the examples in which their margin is greater than 0
    dscores = np.zeros((N,C))
    dscores[losses > 0] = 1
    d_correct_score = - np.sum(dscores, axis=1)
    dscores[correct_class_idx] = d_correct_score
    dW = X.T.dot(dscores)
    
    #Average
    loss /= N
    dW /= N
    
    #Regulariztion
    loss += reg * np.sum(W * W)
    dW += reg * 2 * W

    return loss,dW

In [12]:
W = np.random.randn(785, 10)

In [22]:
y = np.reshape(y_train,(y_train.shape[0]))
loss,dw = svm_loss(W,X_train[0:120,:],y[0:120],0)

1


In [24]:
print(dw.shape)

(785, 10)


In [12]:
def train(X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
              batch_size=120, show_loss=False):
        num_train, dim = X.shape
        num_classes = np.max(y) + 1 
        
        W = 0.001 * np.random.randn(dim, num_classes)

        start = 0
        # Run stochastic gradient descent to optimize W
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None
            
            indices = np.random.choice(num_train, size=batch_size)
            X_batch = X[indices]
            y_batch = y[indices]


            # evaluate loss and gradient
            loss, grad = svm_loss(W,X_batch, y_batch, reg)
            loss_history.append(loss)
            
            W -= learning_rate * grad


            if show_loss and it % 100 == 0:
                print('iteration %d / %d: loss %f' % (it, num_iters, loss))

        return loss_history,W

In [13]:
def predict(X,W):
    scores = X.dot(W)
    y_pred = np.argmax(scores, axis=1)
    
    return y_pred

In [14]:
loss_hist,W = train(X_train, y_train, learning_rate=1e-5, reg=4.6,
                      num_iters=2000, show_loss=True)

iteration 0 / 2000: loss 18.688738
iteration 100 / 2000: loss 2.513861
iteration 200 / 2000: loss 2.350509
iteration 300 / 2000: loss 2.168045
iteration 400 / 2000: loss 1.811276
iteration 500 / 2000: loss 6.666360
iteration 600 / 2000: loss 2.948842
iteration 700 / 2000: loss 5.234330
iteration 800 / 2000: loss 2.850373
iteration 900 / 2000: loss 1.969431
iteration 1000 / 2000: loss 2.305958
iteration 1100 / 2000: loss 3.157930
iteration 1200 / 2000: loss 1.539895
iteration 1300 / 2000: loss 2.833641
iteration 1400 / 2000: loss 1.569824
iteration 1500 / 2000: loss 4.432980
iteration 1600 / 2000: loss 3.069718
iteration 1700 / 2000: loss 2.440557
iteration 1800 / 2000: loss 2.482753
iteration 1900 / 2000: loss 3.866976


In [15]:
# Accuracy of train and validation
y_train_pred = predict(X_train,W)
print('training accuracy: %f' % (np.mean(y_train == y_train_pred), ))
y_val_pred = predict(X_val,W)
print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), ))

training accuracy: 0.839521
validation accuracy: 0.833083


In [38]:
print(y_train,y_train_pred)

[[1]
 [5]
 [2]
 ...
 [7]
 [3]
 [9]] [1 5 2 ... 7 3 9]


In [16]:
y_test_pred = predict(X_test,W)
test_accuracy = np.mean(y_test == y_test_pred)
print('linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)

linear SVM on raw pixels final test set accuracy: 0.836000
