In [3]:
# Simple smo algorithm for SVM. 
# The results of each running may be different due to random selection inside simple smo algorithm.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utility

Xtrain = pd.read_csv("MNIST_X_train.csv")
ytrain = pd.read_csv("MNIST_Y_train.csv")
Xtest = pd.read_csv("MNIST_X_test.csv")
ytest = pd.read_csv("MNIST_Y_test.csv")
npXtrain = Xtrain.to_numpy()
npytrain = ytrain.to_numpy()
npXtest = Xtest.to_numpy()
npytest = ytest.to_numpy()
print("The shape of Xtrain is {}".format(Xtrain.shape))
print("The shape of Xtest is {}".format(Xtest.shape))
# Feature scaling
mean = np.mean(npXtrain, axis=0).reshape((1, npXtrain.shape[1]))
std = np.std(npXtrain, axis=0).reshape((1, npXtrain.shape[1]))
scaled_Xtrain = utility.scaler(npXtrain, mean, std)
scaled_Xtest = utility.scaler(npXtest, mean, std)

The shape of Xtrain is (2000, 784)
The shape of Xtest is (500, 784)


In [4]:
# linear kernel
dataList = [scaled_Xtrain[np.where(npytrain == i)[0]] for i in range(10)]       
clfMat = np.zeros((10,10), dtype=object) # objects are SVM classifier
# one vs one approach
for i in range(10):
    for j in range(10):
        if j > i:
            # prepare data and target for i vs j
            targetI = np.ones(dataList[i].shape[0]).reshape((-1,1))
            targetJ = -np.ones(dataList[j].shape[0]).reshape((-1,1))
            data = np.vstack((dataList[i], dataList[j]))
            target = np.vstack((targetI, targetJ))
            # Train class i vs class j
            clfMat[i,j] = utility.SVM(kernel = "linear", iterations = 50)
            clfMat[i,j].fit(data, target)
            # compute training accuracy
            predLabelsIJ = clfMat[i,j].predict(data)
            scoreIJ = utility.accuracy(target, predLabelsIJ)
            print("Training class {} vs class {} is complete. The training accuracy is {:.2f}%".format(i,j,scoreIJ*100))

predLabels = np.zeros(npytest.shape[0]).reshape((-1, 1))
# one vs one classification
for ind in np.arange(npytest.shape[0]):
    OvOlabel_list = np.zeros(10)
    for i in range(9):
        for j in range(10):
            if j > i:
                if clfMat[i,j].predict(scaled_Xtest[ind,:].reshape(1,-1)) == 1:
                    OvOlabel_list[i] += 1
                else:
                    OvOlabel_list[j] += 1
    # end for
    predLabels[ind, 0] = np.argmax(OvOlabel_list) 

score = utility.accuracy(npytest, predLabels)
print("Using linear kernel, the accuracy of multiclass classification is {:.2f}%".format(score*100))   

Training class 0 vs class 1 is complete. The training accuracy is 100.00%
Training class 0 vs class 2 is complete. The training accuracy is 100.00%
Training class 0 vs class 3 is complete. The training accuracy is 100.00%
Training class 0 vs class 4 is complete. The training accuracy is 100.00%
Training class 0 vs class 5 is complete. The training accuracy is 100.00%
Training class 0 vs class 6 is complete. The training accuracy is 100.00%
Training class 0 vs class 7 is complete. The training accuracy is 100.00%
Training class 0 vs class 8 is complete. The training accuracy is 100.00%
Training class 0 vs class 9 is complete. The training accuracy is 100.00%
Training class 1 vs class 2 is complete. The training accuracy is 100.00%
Training class 1 vs class 3 is complete. The training accuracy is 100.00%
Training class 1 vs class 4 is complete. The training accuracy is 100.00%
Training class 1 vs class 5 is complete. The training accuracy is 100.00%
Training class 1 vs class 6 is complet

In [5]:
# poly kernel
dataList = [scaled_Xtrain[np.where(npytrain == i)[0]] for i in range(10)]       
clfMat = np.zeros((10,10), dtype=object) # objects are SVM classifier
# one vs one approach
for i in range(10):
    for j in range(10):
        if j > i:
            # prepare data and target for i vs j
            targetI = np.ones(dataList[i].shape[0]).reshape((-1,1))
            targetJ = -np.ones(dataList[j].shape[0]).reshape((-1,1))
            data = np.vstack((dataList[i], dataList[j]))
            target = np.vstack((targetI, targetJ))
            # Train class i vs class j
            clfMat[i,j] = utility.SVM(kernel = "poly", gamma = 1, iterations = 50)
            clfMat[i,j].fit(data, target)
            # compute training accuracy
            predLabelsIJ = clfMat[i,j].predict(data)
            scoreIJ = utility.accuracy(target, predLabelsIJ)
            print("Training class {} vs class {} is complete. The training accuracy is {:.2f}%".format(i,j,scoreIJ*100))

predLabels = np.zeros(npytest.shape[0]).reshape((-1, 1))
# one vs one classification
for ind in np.arange(npytest.shape[0]):
    OvOlabel_list = np.zeros(10)
    for i in range(9):
        for j in range(10):
            if j > i:
                if clfMat[i,j].predict(scaled_Xtest[ind,:].reshape(1,-1)) == 1:
                    OvOlabel_list[i] += 1
                else:
                    OvOlabel_list[j] += 1
    # end for
    predLabels[ind, 0] = np.argmax(OvOlabel_list) 

score = utility.accuracy(npytest, predLabels)
print("Using poly kernel, the accuracy of multiclass classification is {:.2f}%".format(score*100))  

Training class 0 vs class 1 is complete. The training accuracy is 97.52%
Training class 0 vs class 2 is complete. The training accuracy is 100.00%
Training class 0 vs class 3 is complete. The training accuracy is 99.76%
Training class 0 vs class 4 is complete. The training accuracy is 100.00%
Training class 0 vs class 5 is complete. The training accuracy is 100.00%
Training class 0 vs class 6 is complete. The training accuracy is 100.00%
Training class 0 vs class 7 is complete. The training accuracy is 100.00%
Training class 0 vs class 8 is complete. The training accuracy is 100.00%
Training class 0 vs class 9 is complete. The training accuracy is 100.00%
Training class 1 vs class 2 is complete. The training accuracy is 100.00%
Training class 1 vs class 3 is complete. The training accuracy is 99.77%
Training class 1 vs class 4 is complete. The training accuracy is 100.00%
Training class 1 vs class 5 is complete. The training accuracy is 100.00%
Training class 1 vs class 6 is complete. 

In [6]:
# rbf kernel
dataList = [scaled_Xtrain[np.where(npytrain == i)[0]] for i in range(10)]       
clfMat = np.zeros((10,10), dtype=object) # objects are SVM classifier
# one vs one approach
for i in range(10):
    for j in range(10):
        if j > i:
            # prepare data and target for i vs j
            targetI = np.ones(dataList[i].shape[0]).reshape((-1,1))
            targetJ = -np.ones(dataList[j].shape[0]).reshape((-1,1))
            data = np.vstack((dataList[i], dataList[j]))
            target = np.vstack((targetI, targetJ))
            # Train class i vs class j
            clfMat[i,j] = utility.SVM(kernel = "rbf", iterations = 20)
            clfMat[i,j].fit(data, target)
            # compute training accuracy
            predLabelsIJ = clfMat[i,j].predict(data)
            scoreIJ = utility.accuracy(target, predLabelsIJ)
            print("Training class {} vs class {} is complete. The training accuracy is {:.2f}%".format(i,j,scoreIJ*100))

predLabels = np.zeros(npytest.shape[0]).reshape((-1, 1))
# one vs one classification
for ind in np.arange(npytest.shape[0]):
    OvOlabel_list = np.zeros(10)
    for i in range(9):
        for j in range(10):
            if j > i:
                if clfMat[i,j].predict(scaled_Xtest[ind,:].reshape(1,-1)) == 1:
                    OvOlabel_list[i] += 1
                else:
                    OvOlabel_list[j] += 1
    # end for
    predLabels[ind, 0] = np.argmax(OvOlabel_list) 

score = utility.accuracy(npytest, predLabels)
print("Using rbf kernel, the accuracy of multiclass classification is {:.2f}%".format(score*100))  

Training class 0 vs class 1 is complete. The training accuracy is 100.00%
Training class 0 vs class 2 is complete. The training accuracy is 100.00%
Training class 0 vs class 3 is complete. The training accuracy is 100.00%
Training class 0 vs class 4 is complete. The training accuracy is 100.00%
Training class 0 vs class 5 is complete. The training accuracy is 99.74%
Training class 0 vs class 6 is complete. The training accuracy is 100.00%
Training class 0 vs class 7 is complete. The training accuracy is 100.00%
Training class 0 vs class 8 is complete. The training accuracy is 99.74%
Training class 0 vs class 9 is complete. The training accuracy is 100.00%
Training class 1 vs class 2 is complete. The training accuracy is 99.55%
Training class 1 vs class 3 is complete. The training accuracy is 98.86%
Training class 1 vs class 4 is complete. The training accuracy is 99.54%
Training class 1 vs class 5 is complete. The training accuracy is 99.76%
Training class 1 vs class 6 is complete. The

In [7]:
# sigmoid
dataList = [scaled_Xtrain[np.where(npytrain == i)[0]] for i in range(10)]       
clfMat = np.zeros((10,10), dtype=object) # objects are SVM classifier
# one vs one approach
for i in range(10):
    for j in range(10):
        if j > i:
            # prepare data and target for i vs j
            targetI = np.ones(dataList[i].shape[0]).reshape((-1,1))
            targetJ = -np.ones(dataList[j].shape[0]).reshape((-1,1))
            data = np.vstack((dataList[i], dataList[j]))
            target = np.vstack((targetI, targetJ))
            # Train class i vs class j
            clfMat[i,j] = utility.SVM(kernel = "sigmoid", iterations = 20)
            clfMat[i,j].fit(data, target)
            # compute training accuracy
            predLabelsIJ = clfMat[i,j].predict(data)
            scoreIJ = utility.accuracy(target, predLabelsIJ)
            print("Training class {} vs class {} is complete. The training accuracy is {:.2f}%".format(i,j,scoreIJ*100))

predLabels = np.zeros(npytest.shape[0]).reshape((-1, 1))
# one vs one classification
for ind in np.arange(npytest.shape[0]):
    OvOlabel_list = np.zeros(10)
    for i in range(9):
        for j in range(10):
            if j > i:
                if clfMat[i,j].predict(scaled_Xtest[ind,:].reshape(1,-1)) == 1:
                    OvOlabel_list[i] += 1
                else:
                    OvOlabel_list[j] += 1
    # end for
    predLabels[ind, 0] = np.argmax(OvOlabel_list) 

score = utility.accuracy(npytest, predLabels)
print("Using sigmoid kernel, the accuracy of multiclass classification is {:.2f}%".format(score*100))  

Training class 0 vs class 1 is complete. The training accuracy is 100.00%
Training class 0 vs class 2 is complete. The training accuracy is 98.10%
Training class 0 vs class 3 is complete. The training accuracy is 98.30%
Training class 0 vs class 4 is complete. The training accuracy is 99.75%
Training class 0 vs class 5 is complete. The training accuracy is 98.71%
Training class 0 vs class 6 is complete. The training accuracy is 99.00%
Training class 0 vs class 7 is complete. The training accuracy is 99.76%
Training class 0 vs class 8 is complete. The training accuracy is 98.94%
Training class 0 vs class 9 is complete. The training accuracy is 99.49%
Training class 1 vs class 2 is complete. The training accuracy is 98.89%
Training class 1 vs class 3 is complete. The training accuracy is 98.64%
Training class 1 vs class 4 is complete. The training accuracy is 99.77%
Training class 1 vs class 5 is complete. The training accuracy is 99.76%
Training class 1 vs class 6 is complete. The train

In [10]:
import numpy as np
A = np.array([1,2,3])
print(A.T)

[1 2 3]


In [11]:
np.dot(A, A)

14

In [7]:
np.dot(B,A)

array([6., 6., 6.])

In [8]:
np.dot(A, B)

array([6., 6., 6.])

In [9]:
np.linalg.inv(B)

LinAlgError: Singular matrix