In [1]:
import math
import numpy as np
import pandas as pd

## Functions

In [2]:
def read_dataset(feature_file, label_file):
    ''' Read data set in *.csv to data frame in Pandas'''
    df_X = pd.read_csv(feature_file)
    df_y = pd.read_csv(label_file)
    X = df_X.values  # convert values in dataframe to numpy array (features)
    y = df_y.values  # convert values in dataframe to numpy array (label)
    return X, y

def normalize_features(X_train, X_test):
    from sklearn.preprocessing import StandardScaler  # import libaray
    scaler = StandardScaler()  # call an object function
    scaler.fit(X_train)  # calculate mean, std in X_train
    X_train_norm = scaler.transform(X_train)  # apply normalization on X_train
    # we use the same normalization on X_test
    X_test_norm = scaler.transform(X_test)
    return X_train_norm, X_test_norm

def one_hot_encoder(y_train, y_test):
    ''' convert label to a vector under one-hot-code fashion. '''
    from sklearn import preprocessing
    lb = preprocessing.LabelBinarizer()
    lb.fit(y_train)
    y_train_ohe1 = lb.transform(y_train)
    y_test_ohe1 = lb.transform(y_test)
    y_train_ohe = np.where(y_train_ohe1 > 0, y_train_ohe1, -1) #In SVM, we label negative class as −1 instead of 0.
    y_test_ohe = np.where(y_test_ohe1 > 0, y_test_ohe1, -1) 
    return y_train_ohe, y_test_ohe

def accuracy(ypred, yexact):
    p = np.array(ypred == yexact, dtype=int)
    return np.sum(p) / float(len(yexact))


## Check Input information

In [3]:
# X_train, y_train = read_dataset('MNIST_X_train.csv', 'MNIST_y_train.csv')
# X_test, y_test = read_dataset('MNIST_X_test.csv', 'MNIST_y_test.csv')
X_train, y_train = read_dataset('Digits_X_train.csv', 'Digits_y_train.csv')
X_test, y_test = read_dataset('Digits_X_test.csv', 'Digits_y_test.csv')
X_train_norm, X_test_norm = normalize_features(X_train, X_test)
y_train_ohe, y_test_ohe = one_hot_encoder(y_train, y_test)
print(X_train_norm.shape)
print(X_test_norm.shape)
print(y_train_ohe.shape)
print(y_test_ohe.shape)

(1347, 64)
(450, 64)
(1347, 10)
(450, 10)


## Stucture of SVM (Classification version)
$N$: #of samples, $D$: #of features

Then $X$ has shape $(N,D)$, $W$ has shape $(D,10)$, $y$ has shape $(N,10)$, and we have:

\begin{align}
    \hat{y}  &= XW + b \\ 
    L        &= \|W\|_2 +\lambda \sum_{i}(0, 1-y^{i}W^Tx^{i}-by^i)  \\
    \dfrac{\partial{L}}{\partial{W}}  &= \frac{W}{\|W\|_2} - \lambda (0, (y^T X)^T)\\
    \dfrac{\partial{L}}{\partial{b}}  &= -\sum_{i}\lambda (0, y^i) \\
    W                                 &:= W - lr*\dfrac{\partial{L}}{\partial{W}}\\
    b                                 &:= b - lr *\dfrac{\partial{L}}{\partial{b}}
\end{align}



In [4]:
class SVM():
    def __init__(self, X, y, lr=0.01, Lambda=0.01):
        '''
        Input:
        - X: shape (M,N)
        - y: shape (M,P) P = 10
        - W shape (N,P)
        '''
        self.X = X
        self.y = y
        self.M = X.shape[0]  # numbers of samples
        self.N = X.shape[1]  # numbers of features
        self.P = y.shape[1]
        self.W = np.random.randn(self.N, self.P)  # 784 by 10
        self.b = np.zeros((1, self.P))   # 1 by 10
        self.lr = lr
        self.Lambda = Lambda

    def forward(self):
        self.y_hat = np.dot(self.X, self.W) + self.b # 2000 by 10
        self.cond = 1 - self.y * self.y_hat

    def loss(self):
        self.forward()
        self.hinge_loss = np.where(self.cond > 0, self.y, 0)  # 2000 by 10
        self.loss = np.linalg.norm(self.W, axis=0) + self.Lambda * self.hinge_loss

    def sub_gradient_descent(self):
        y = np.where(self.cond > 0, self.y, 0)
        y_b = np.where(self.cond > 0, self.y, 0)
        dW = (1/np.linalg.norm(self.W, axis=0)) * self.W - \
            self.Lambda * np.dot(y.T, self.X).T
        db = -np.sum(self.Lambda * y_b ,axis = 0)
        self.W = self.W - self.lr * dW
        self.b = self.b - self.lr * db

    def predict(self, X_test):
        y_hat_test = np.dot(X_test, self.W) + self.b
        labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        num_test_samples = X_test.shape[0]
        ypred = np.zeros(num_test_samples, dtype=int)
        for i in range(num_test_samples):
            ypred[i] = labels[np.argmax(y_hat_test[i, :])]
        return ypred

mySVM = SVM(X_train_norm, y_train_ohe, lr=0.01, Lambda=200)
epoch_num = 2000
for i in range(epoch_num):
    mySVM.forward()
    mySVM.sub_gradient_descent()
   
y_pred = mySVM.predict(X_test_norm)
print(y_pred)
print(y_test.ravel())
print('Accuracy of our model ', accuracy(y_pred, y_test.ravel()))

[7 3 0 2 3 1 2 0 5 6 4 1 7 6 1 5 4 8 3 8 5 9 7 9 5 6 1 7 3 0 2 2 5 2 5 4 1
 7 2 7 8 3 9 6 3 2 9 7 6 8 8 9 7 1 0 7 2 5 7 0 8 7 6 8 9 7 8 6 8 9 1 6 1 9
 0 2 1 4 9 9 9 9 6 9 0 1 3 8 8 6 0 6 4 8 1 9 6 8 9 6 6 3 6 6 5 3 1 0 2 8 2
 2 0 0 7 2 5 6 6 2 5 4 3 2 2 1 7 1 7 4 7 3 6 9 0 2 1 4 9 8 3 6 4 0 7 4 6 4
 0 5 0 1 2 8 9 2 4 4 5 0 9 6 7 1 1 1 9 2 3 7 9 7 2 3 8 6 3 6 0 1 1 1 9 2 4
 5 0 3 1 3 7 4 0 2 5 0 7 6 8 4 6 9 5 7 4 4 3 7 1 9 0 6 1 4 1 8 1 4 5 1 2 0
 2 0 3 0 5 2 0 3 3 6 5 2 9 4 2 6 1 6 0 3 9 2 8 5 2 6 3 6 7 4 6 1 2 1 1 9 5
 3 3 4 9 7 5 1 9 6 8 9 2 8 7 1 5 7 4 3 6 0 6 5 7 2 4 8 3 3 4 1 4 2 5 9 3 8
 9 3 5 4 5 6 4 1 6 2 7 9 5 1 3 5 0 1 6 8 5 7 5 3 7 7 4 0 2 3 2 6 3 4 1 0 3
 4 9 2 9 4 9 1 3 1 7 4 5 3 7 1 6 6 3 9 5 2 9 4 0 0 2 0 1 2 5 8 6 8 5 2 0 9
 6 4 9 7 8 6 4 9 5 6 6 3 2 0 4 1 9 2 0 9 3 1 2 0 1 4 1 6 3 5 2 1 3 9 7 7 1
 2 0 3 8 8 7 3 5 0 1 3 7 7 3 1 8 8 0 6 3 3 3 9 3 7 9 7 0 8 6 2 2 1 0 0 7 6
 3 6 6 6 4 1]
[7 3 0 2 3 1 2 0 5 6 4 1 7 6 1 5 4 8 3 0 5 9 7 9 5 6 1 7 3 0 2 2 5 2 5 4 1
 7 2 7 8 3 

## Sklearn library

In [5]:
from sklearn import svm
clf = svm.LinearSVC()
clf.fit(X_train_norm, y_train.ravel()) 
y_hat = clf.predict(X_test_norm)
print('Accuracy of library model ', accuracy(y_hat, y_test.ravel()))

Accuracy of library model  0.9488888888888889
