# LIBSVM options

In [1]:
# 'options':
#     -s svm_type : set type of SVM (default 0)
#         0 -- C-SVC
#         1 -- nu-SVC
#         2 -- one-class SVM
#         3 -- epsilon-SVR
#         4 -- nu-SVR
#     -t kernel_type : set type of kernel function (default 2)
#         0 -- linear: u'*v
#         1 -- polynomial: (gamma*u'*v + coef0)^degree
#         2 -- radial basis function: exp(-gamma*|u-v|^2)
#         3 -- sigmoid: tanh(gamma*u'*v + coef0)
#         4 -- precomputed kernel (kernel values in training_set_file)
#     -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
#     -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
#     -e epsilon : set tolerance of termination criterion (default 0.001)
#     -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)


In [2]:
#importing necessary packaages
import numpy as np
import PIL
from PIL import Image
import os
import ntpath
import math
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from libsvm.svmutil import *

# Train Data

In [3]:
#Placing the input images into numpy array
allData = []       
happyData = []
sadData = []
N = 0
nHappy = 0
nSad = 0
directory = r"C:\Users\ujjaw\Desktop\MLSP_Assignments\Ass4\Q4\Data\emotion_classification\train"
for filename in os.scandir(directory):
    if filename.is_file():
        filename2 = directory + "\\" + ntpath.basename(filename)
        img = Image.open(filename2).resize((100,100))
        npImg = np.array(img)
        flatArray = np.transpose(np.ravel(npImg))
        x = ntpath.basename(filename).split(".")
        if x[1] == "happy":
            happyData.append(list(flatArray))
            nHappy += 1
        else:
            sadData.append(list(flatArray))
            nSad += 1
N = nHappy + nSad
happyData = np.transpose(np.array(happyData))
sadData = np.transpose(np.array(sadData))
allData = np.concatenate((happyData,sadData),axis = 1)

print(allData.shape)
print(happyData.shape)
print(sadData.shape)

(10000, 20)
(10000, 9)
(10000, 11)


# Test Data

In [4]:
#Placing the test images into numpy array
happyDataTest = []
sadDataTest = []
N_test = 0
nHappyTest = 0
nSadTest = 0
directory = r"C:\Users\ujjaw\Desktop\MLSP_Assignments\Ass4\Q4\Data\emotion_classification\test"
for filename in os.scandir(directory):
    if filename.is_file():
        filename2 = directory + "\\" + ntpath.basename(filename)
        img = Image.open(filename2).resize((100,100))
        np_img = np.array(img)/1
        flat_array = np.transpose(np.ravel(np_img))
        x = ntpath.basename(filename).split(".")
        if x[1] == "happy":
            happyDataTest.append(list(flat_array))
            nHappyTest += 1
        else:
            sadDataTest.append(list(flat_array))
            nSadTest += 1 
N_test = nHappyTest + nSadTest
happyDataTest = np.transpose(np.array(happyDataTest))
sadDataTest = np.transpose(np.array(sadDataTest))
allDataTest = np.concatenate((happyDataTest,sadDataTest),axis = 1)

print(allDataTest.shape)
print(happyDataTest.shape)
print(sadDataTest.shape)

(10000, 10)
(10000, 6)
(10000, 4)


# PART A: SVM Modelling

In [5]:
#Using SVM
#y -> list containing class labels for each data point; -1 for sad, and 1 for happy
y_happy = [1 for i in range(nHappy)]
y_sad = [-1 for i in range(nSad)]
y = y_happy + y_sad

In [6]:
y_happyTest = [1 for i in range(nHappyTest)]
y_sadTest = [-1 for i in range(nSadTest)]
y_test = y_happyTest + y_sadTest

In [7]:
class usingLIBSVM:
    def __init__(self, K, kernel, C, epsilon):
        self.parameters = '-t ' + str(kernel) + ' -c ' + str(C) + ' -p ' + str(epsilon)
        
#         # Reducing the dimension to K using PCA    TRAIN DATA
#         tempTrain = PCA(n_components = K)                  
#         self.reducedAllData = tempTrain.fit_transform(allData.T)  
        
#         # Reducing the dimension to K using PCA    TEST DATA
#         tempTest = PCA(n_components = K)                  
#         self.reducedAllDataTest = tempTest.fit_transform(allDataTest.T)  
        
        completeData = np.concatenate((allData,allDataTest),axis = 1)
        temp = PCA(n_components = K)                  
        reducedCompleteData = temp.fit_transform(completeData.T) 
        
        
        self.reducedAllData = reducedCompleteData[:N,]
        self.reducedAllDataTest = reducedCompleteData[N:,]
        
        self.SVMmodel = None
        
    def fit(self):
        self.SVMmodel = svm_train(y,self.reducedAllData, self.parameters)
    
    def predict(self):
        p_labels, p_acc, p_vals = svm_predict(y_test,self.reducedAllDataTest, self.SVMmodel)
        return p_labels
    def getReducedData(self):
        return self.reducedAllData, self.reducedAllDataTest

In [8]:
def findAccuracy(labels):
    correct = 0
    for i in range(N_test):
        if (y_test[i] == labels[i]):
            correct += 1
    return correct/N_test * 100

In [9]:
kernelType = ['linear', 'polynomial', 'RBF', 'Sigmoid']
maxAcc = 0
maxAccStr = []
maxAccValue = []
for k in range(14,19):
    for kernel in range(4):
        for c in range(4):
            for e in range(4):
                C = 10 ** c /100
                epsilon = 10 ** e /100
                tempObj = usingLIBSVM(k, kernel, C, epsilon)
                tempObj.fit()
                tempStr = "Kernel = " + kernelType[kernel] + "; C = " + str(C) + "; epsilon =" + str(epsilon) + "; K = " + str(k)
                print(tempStr)
                labels = tempObj.predict()
                print()
                accuracy = findAccuracy(labels)
                if (maxAcc <= accuracy):
                    maxAcc = accuracy
                    maxAccStr.append(tempStr)
                    maxAccValue.append(maxAcc)
                del tempObj
finalMaxAcc = max(maxAccValue)
print("Max Accuracy at the following settings with Accuracy = " + str(maxAcc))
for i in range(len(maxAccValue)):
    if (maxAccValue[i] == finalMaxAcc):
        print('\033[1m' + maxAccStr[i] + '\033[0m')


Kernel = linear; C = 0.01; epsilon =0.01; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.01; epsilon =0.1; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.01; epsilon =1.0; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.01; epsilon =10.0; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.1; epsilon =0.01; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.1; epsilon =0.1; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.1; epsilon =1.0; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 0.1; epsilon =10.0; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 1.0; epsilon =0.01; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 1.0; epsilon =0.1; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 1.0; epsilon =1.0; K = 14
Accuracy = 70% (7/10) (classification)

Kernel = linear; C = 1.0; epsilon 

Accuracy = 50% (5/10) (classification)

Kernel = RBF; C = 0.01; epsilon =0.01; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.01; epsilon =0.1; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.01; epsilon =1.0; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.01; epsilon =10.0; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =0.01; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =0.1; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =1.0; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =10.0; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 1.0; epsilon =0.01; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 1.0; epsilon =0.1; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 1.0; epsilon =1.0; K = 15
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 1.0; epsi

Kernel = Sigmoid; C = 10.0; epsilon =10.0; K = 16
Accuracy = 80% (8/10) (classification)

Kernel = linear; C = 0.01; epsilon =0.01; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.01; epsilon =0.1; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.01; epsilon =1.0; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.01; epsilon =10.0; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.1; epsilon =0.01; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.1; epsilon =0.1; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.1; epsilon =1.0; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 0.1; epsilon =10.0; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 1.0; epsilon =0.01; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 1.0; epsilon =0.1; K = 17
Accuracy = 90% (9/10) (classification)

Kernel = linear; C = 1.0; epsil

Kernel = polynomial; C = 10.0; epsilon =0.1; K = 18
Accuracy = 50% (5/10) (classification)

Kernel = polynomial; C = 10.0; epsilon =1.0; K = 18
Accuracy = 50% (5/10) (classification)

Kernel = polynomial; C = 10.0; epsilon =10.0; K = 18
Accuracy = 50% (5/10) (classification)

Kernel = RBF; C = 0.01; epsilon =0.01; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.01; epsilon =0.1; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.01; epsilon =1.0; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.01; epsilon =10.0; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =0.01; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =0.1; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =1.0; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 0.1; epsilon =10.0; K = 18
Accuracy = 40% (4/10) (classification)

Kernel = RBF; C = 1.0; epsilon =0.01; K = 1

With Kernel SVM, I'm getting highest accuracy of 90% with the following settings printed above in bold. Summarizing that, we get maximum accuracy when kernel type is LINEAR, and value of K ranges from 15 to 18. However, for this data set, the different values of C and epsilon does not change the accuracy given LINEAR Kernel and K = [15, 18]

# PART B: LDA vs SVM

In [10]:
for k in range(1,30):
    tempObj = usingLIBSVM(k, 0, 0.1, 0.1)                          #Creating object to get Dim. Reduced Data from PCA
    trainData, testData = tempObj.getReducedData()
    lda = LinearDiscriminantAnalysis(n_components = 1)
    lda.fit(trainData, y)
    acc = findAccuracy(list(lda.predict(testData)))
    if (acc >= 90.0):
        print('\033[1m'+"Accuracy = ",acc, "% at K =", k,'\033[0m')
    else:
        print("Accuracy = ",acc, "% at K =", k)

Accuracy =  40.0 % at K = 1
Accuracy =  40.0 % at K = 2
Accuracy =  60.0 % at K = 3
Accuracy =  60.0 % at K = 4
Accuracy =  80.0 % at K = 5
[1mAccuracy =  100.0 % at K = 6 [0m
Accuracy =  70.0 % at K = 7
Accuracy =  80.0 % at K = 8
Accuracy =  80.0 % at K = 9
[1mAccuracy =  90.0 % at K = 10 [0m
Accuracy =  80.0 % at K = 11
Accuracy =  70.0 % at K = 12
Accuracy =  60.0 % at K = 13
Accuracy =  60.0 % at K = 14
Accuracy =  60.0 % at K = 15
Accuracy =  60.0 % at K = 16
Accuracy =  60.0 % at K = 17
Accuracy =  60.0 % at K = 18
Accuracy =  70.0 % at K = 19
Accuracy =  80.0 % at K = 20
Accuracy =  40.0 % at K = 21
Accuracy =  60.0 % at K = 22
Accuracy =  60.0 % at K = 23
Accuracy =  60.0 % at K = 24
Accuracy =  40.0 % at K = 25
Accuracy =  50.0 % at K = 26
Accuracy =  20.0 % at K = 27
Accuracy =  40.0 % at K = 28
Accuracy =  40.0 % at K = 29


Maximum performance obatined by SVM is 90% where as by LDA, I'm are getting a maximum of 100% accuracy at K = 6.

LDA seperates the data such that mean of classes is well seperated, whereas, SVM chooses a hyperplane which maximizes the margin. SVM using kernel performs very good with higher dimensional data where as LDA doesn't perform that good