In [1]:
from libsvm.svm import *
from libsvm.svmutil import *

In [2]:
import numpy as np
import os
import matplotlib.pyplot as plt
import random
from PIL import Image

In [9]:
def load_image_data(data_path):
    image_file_paths=os.listdir(data_path)
    X=[]
    labels=[]
    
    print('image data loading from ',data_path)
    for i in image_file_paths:
        individual_image_path=data_path+"/"+i
        #print(i.split('.')[1])
        if(i.split('.')[1]=='happy'):
            labels.append(1)
        else:
            labels.append(-1)
        individual_image_file = Image.open(individual_image_path)
        individual_image_file=np.array(individual_image_file,dtype='float64')
        individual_image_file=np.reshape(individual_image_file,-1)
        X.append(individual_image_file)
    
    # made into single numpy array of N*size_of_image
    X=np.array(X)
    labels=np.array(labels)
    
    print('image data loaded.')
    return X,labels

In [10]:
train_data_path='Data/emotion_classification/train'
train_X,train_labels=load_image_data(train_data_path)
train_N=train_X.shape[0]
train_D=train_X.shape[1]


test_data_path='Data/emotion_classification/test'
test_X,test_labels=load_image_data(test_data_path)
test_N=test_X.shape[0]
test_D=test_X.shape[1]

image data loading from  Data/emotion_classification/train
image data loaded.
image data loading from  Data/emotion_classification/test
image data loaded.


In [11]:
full_data=np.vstack([train_X,test_X])
full_data.shape

(30, 10201)

In [12]:
K=8

from sklearn.decomposition import PCA
pca = PCA(n_components=K)
projected_full_data = pca.fit_transform(full_data)
print(projected_full_data[0:20,:].shape)

projected_trained_data=projected_full_data[0:20,:]
projected_tested_data=projected_full_data[20:30,:]

print(projected_trained_data.shape)
print(projected_tested_data.shape)

(20, 8)
(20, 8)
(10, 8)


In [13]:
prob = svm_problem(train_labels,projected_trained_data)

param = svm_parameter()
param.kernel_type = LINEAR
param.C = 0.000010
param.eps=0.5

m = svm_train(prob, param)

print("For training:")
svm_predict(train_labels,projected_trained_data,m)

print("For testing:")
svm_predict(test_labels,projected_tested_data,m)


For training:
Accuracy = 100% (20/20) (classification)
For testing:
Accuracy = 90% (9/10) (classification)


([1.0, 1.0, -1.0, -1.0, -1.0, 1.0, -1.0, 1.0, 1.0, -1.0],
 (90.0, 0.4, 0.6666666666666666),
 [[6.320250273694218],
  [0.9277095814932129],
  [-7.292812055407042],
  [-0.02590085980697221],
  [-1.6959412675825565],
  [17.7548548188702],
  [-0.18332828890534703],
  [0.5357312509963957],
  [5.118488849453515],
  [-7.599157487128016]])

In [14]:
def svm_cusomized(train_data,labels,C,epsilon,kernel_type):
    prob = svm_problem(labels,train_data)

    param = svm_parameter()
    param.C = C
    param.eps=epsilon
    
    if(kernel_type=='LINEAR'):
        param.kernel_type = LINEAR
    elif(kernel_type=='POLY'):
        param.kernel_type = POLY
    elif(kernel_type=='RBF'):
        param.kernel_type = RBF
    elif(kernel_type=='SIGMOID'):
        param.kernel_type = SIGMOID
    
    m = svm_train(prob, param)

    return m


# LINEAR KERNEL:

In [15]:
m=svm_cusomized(projected_trained_data,train_labels,C=0.000010,epsilon=0.5,kernel_type='LINEAR')

print("For training:")
svm_predict(train_labels,projected_trained_data,m)

print("For testing:")
predicted_labels=svm_predict(test_labels,projected_tested_data,m)

For training:
Accuracy = 100% (20/20) (classification)
For testing:
Accuracy = 90% (9/10) (classification)


# POLYNOMIAL KERNEL:

In [16]:
m=svm_cusomized(projected_trained_data,train_labels,C=0.000010,epsilon=0.5,kernel_type='POLY')

print("For training:")
svm_predict(train_labels,projected_trained_data,m)

print("For testing:")
predicted_labels=svm_predict(test_labels,projected_tested_data,m)

For training:
Accuracy = 100% (20/20) (classification)
For testing:
Accuracy = 60% (6/10) (classification)


# RBF-KERNEL:

In [17]:
m=svm_cusomized(projected_trained_data,train_labels,C=1,epsilon=0.5,kernel_type='RBF')

print("For training:")
svm_predict(train_labels,projected_trained_data,m)

print("For testing:")
predicted_labels=svm_predict(test_labels,projected_tested_data,m)

For training:
Accuracy = 100% (20/20) (classification)
For testing:
Accuracy = 40% (4/10) (classification)


# SIGMOID-KERNEL:

In [18]:
m=svm_cusomized(projected_trained_data,train_labels,C=10,epsilon=0.5,kernel_type='SIGMOID')

print("For training:")
svm_predict(train_labels,projected_trained_data,m)

print("For testing:")
predicted_labels=svm_predict(test_labels,projected_tested_data,m)

For training:
Accuracy = 70% (14/20) (classification)
For testing:
Accuracy = 50% (5/10) (classification)


In [19]:
print(predicted_labels[1])

(50.0, 2.0, 0.0)


Linear kernel is performing is giving good accuracy when compare with other kernels. After linear kernels, polynomial kernels are given a better accuracy.

The following table explains the performance of the various kernels on the SVM Model.


<table>
  
<tr>
    <td>KERNEL</td>
    <td>Training Accuracy</td>
    <td>Testing Accuracy</td>
</tr>  
  
<tr>
    <td>LINEAR-KERNEL</td>
    <td>100%</td>
    <td>90%</td>
</tr>

<tr>
<td>POLYNOMIAL-KERNEL</td>
<td>100%</td>
<td>70%</td>
</tr>

<tr>
<td>RBF-KERNEL</td>
<td>100%</td>
<td>40%</td>
</tr>

<tr>
<td>SIGMOID-KERNEL</td>
<td>70%</td>
<td>50%</td>
</tr>
</table>

<b>How does the performance change as a function of K ?</b>

In [36]:
from sklearn.decomposition import PCA

def projected_data(full_data,K):
    pca = PCA(n_components=K)
    projected_full_data = pca.fit_transform(full_data)
    print(projected_full_data[0:20,:].shape)

    projected_trained_data=projected_full_data[0:20,:]
    projected_tested_data=projected_full_data[20:30,:]

    #print(projected_trained_data.shape)
    #print(projected_tested_data.shape)
    
    return projected_trained_data,projected_tested_data

In [29]:
def svm_experiment(training_data,testing_data,kernel_type):
    
    train_X,train_labels=training_data
    test_X,test_labels=testing_data

    statastics=[]
    C=0.000001
    for i in range(14):
        epsilon=0.5
        m=svm_cusomized(train_X,train_labels,C,epsilon,kernel_type)

        #print("For training:")
        traing_accuracy=svm_predict(train_labels,train_X,m)[1][0]
        #print("For testing:")
        testing_accuracy=svm_predict(test_labels,test_X,m)[1][0]


        #print([kernel_type,C,epsilon,traing_accuracy,testing_accuracy,m.get_nr_sv()])

        statastics.append([kernel_type,C,epsilon,traing_accuracy,testing_accuracy,m.get_nr_sv()])
        C=C*10

    for i in statastics:
        print(i)

In [None]:
for K in range(1,11):
    print(K)
    projected_trained_data,projected_tested_data=projected_data(full_data,K)
    training_data=(projected_trained_data,train_labels)
    testing_data=(projected_tested_data,test_labels)
    svm_experiment(training_data,testing_data,'LINEAR')

The following are the results of various experiments of SVM on the different dimenstional data with different kernel functions.


<table>
<tr><td>K</td><td>Kernel</td><td>C</td><td>Epsilon</td><td>Traing Accuracy</td><td>Testing Accuracy</td><td>NoSupport Vectors</td></tr>

<tr><td>1</td><td>'LINEAR'</td><td> 100.0</td><td> 0.5</td><td> 50.0</td><td> 60.0</td><td> 7</td></tr>
<tr><td>2</td><td>'LINEAR'</td><td> 10.0</td><td> 0.5</td><td> 65.0</td><td> 50.0</td><td> 12</td></tr>
<tr><td>3</td><td>'LINEAR'</td><td> 1.0</td><td> 0.5</td><td> 65.0</td><td> 50.0</td><td> 18</td></tr>
<tr><td>4</td><td>'LINEAR'</td><td> 1.0</td><td> 0.5</td><td> 50.0</td><td> 60.0</td><td> 18</td></tr>
<tr><td>5</td><td>'LINEAR'</td><td>0.0001</td><td> 0.5</td><td> 65.0</td><td> 70.0</td><td> 17</td></tr>
<tr><td>6</td><td>'LINEAR'</td><td> 0.001</td><td> 0.5</td><td> 90.0</td><td> 90.0</td><td> 9</td></tr>
<tr><td>7</td><td>'LINEAR'</td><td> 1.0</td><td> 0.5</td><td> 90.0</td><td> 90.0</td><td> 10</td></tr>
<tr><td>8</td><td>'LINEAR'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 90.0</td><td> 10</td></tr>
<tr><td>9</td><td>'LINEAR'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 90.0</td><td> 10</td></tr>
<tr><td>10</td><td>'LINEAR'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 90.0</td><td> 11</td></tr>


</table>
<table>
<tr><td>K</td><td>Kernel</td><td>C</td><td>Epsilon</td><td>Traing Accuracy</td><td>Testing Accuracy</td><td>NoSupport Vectors</td></tr>

<tr><td>1</td><td>'POLY'</td><td> 0.1</td><td> 0.5</td><td> 55.00000000000001</td><td> 40.0</td><td> 7</td></tr>
<tr><td>2</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 50.0</td><td> 70.0</td><td> 7</td></tr>
<tr><td>3</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 50.0</td><td> 11</td></tr>
<tr><td>4</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 60.0</td><td> 13</td></tr>
<tr><td>5</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 50.0</td><td> 16</td></tr>
<tr><td>6</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 70.0</td><td> 13</td></tr>
<tr><td>7</td><td>'POLY'</td><td> 0.1</td><td> 0.5</td><td> 100.0</td><td> 70.0</td><td> 15</td></tr>
<tr><td>8</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 50.0</td><td> 15</td></tr>
<tr><td>9</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 50.0</td><td> 15</td></tr>
<tr><td>10</td><td>'POLY'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 50.0</td><td> 15</td></tr>
</table>
<table>

<tr><td>K</td><td>Kernel</td><td>C</td><td>Epsilon</td><td>Traing Accuracy</td><td>Testing Accuracy</td><td>NoSupport Vectors</td></tr>

<tr><td>1</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 55.00000000000001</td><td> 40.0</td><td> 18</td></tr>
<tr><td>2</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 60.0</td><td> 40.0</td><td> 10</td></tr>
<tr><td>3</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 55.00000000000001</td><td> 60.0</td><td> 13</td></tr>
<tr><td>4</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 50.0</td><td> 60.0</td><td> 12</td></tr>
<tr><td>5</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 65.0</td><td> 60.0</td><td> 10</td></tr>
<tr><td>6</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 60.0</td><td> 80.0</td><td> 8</td></tr>
<tr><td>7</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 60.0</td><td> 70.0</td><td> 10</td></tr>
<tr><td>8</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 70.0</td><td> 50.0</td><td> 10</td></tr>
<tr><td>9</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 70.0</td><td> 60.0</td><td> 10</td></tr>
<tr><td>10</td><td>'SIGMOID'</td><td> 1.0</td><td> 0.5</td><td> 70.0</td><td> 60.0</td><td> 10</td></tr>

</table>
<table>


<tr><td>K</td><td>Kernel</td><td>C</td><td>Epsilon</td><td>Traing Accuracy</td><td>Testing Accuracy</td><td>NoSupport Vectors</td></tr>

<tr><td>1</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>2</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>3</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>4</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>5</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>6</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>7</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>8</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>9</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<tr><td>10</td><td>'RBF'</td><td> 1.0</td><td> 0.5</td><td> 100.0</td><td> 40.0</td><td> 20</td></tr>
<table>


From the above results, we see the following obervations.

initially, with k value, the model accuaracy is also increased.

if dimension(K) of the data is more means, it also represents more variance of the data.

But, after particular K (7 or more than 7 here) most of the variance of the data was already represented with that particualr K. Therefore, after particular K the model accuracy do not get effected.



From the assignment_01, the following things are noticed.

SVM with linear kernel performing better than LDA with same value of the K.

Following are some of the results.

<table>
    <tr><td>K</td><td>Testing Accuracy</td></tr>
    <tr><td>5</td><td>50</td></tr>
    <tr><td>10</td><td>60</td></tr>
    <tr><td>15</td><td>70</td></tr>
    <tr><td>20</td><td>80</td></tr>
</table>

<div>
    <p style="background-color: #ffe5e2; ">
LDA is a supervised dimensionality reduction technique that minimizes the intra class variance and maximizes the inter class variance in the lower dimension space. SVM finds an optimal hyperplane in some non-linear higher dimension space using kernel technique.
    </p>

<p style="background-color: #ffe5e2; ">
LDA is analytical solution.
But, SVM is iterative solution.
</p >
    
<p style="background-color: #ffe5e2; ">
LDA is can handle the multiple classes simultaneously. 
SVM is two class classifier. Therefore, it handles the multi-class classification as as multiple one vs others  problems.
</p>

<p style="background-color: #ffe5e2; ">
In the LDA we use using the co-variance matrix of whole data and same class.Therefore, we use the all data.
In the SVM, we use the some subset of the data to model, it is called support vectors. The SVM model is optimized on support vectors. These support vectors are only the margin of the classifier.
</p>


<p style="background-color: #ffe5e2; ">
SVM is very sensitive to data. The small variations in the data leads to big changes in the model.
</p>
<div>