In [1]:
import numpy as np
import scipy
from numpy.linalg import norm

In [2]:
def hinge_loss(x,y):
    return max(0, 1- x*y)

In [3]:
import cvxopt
class SVM:
    
    def __init__(self, C = 1., kernel = 'rbf', gamma = None):
        self.C = C
        if gamma == None:
            self.gamma = 0.01
        else:
            self.gamma = gamma
        if kernel =='min':
            self.kernel_function = lambda a,b : np.sum(np.minimum(a,b))
        if kernel =='linear':
            self.kernel_function = lambda a,b : np.inner(a,b)
        
        if kernel =='rbf':
            self.kernel_function = lambda a,b : np.exp(- self.gamma * np.linalg.norm( a-b)**2)
    
    def fit(self, X, y):
        
        n_samples = X.shape[0]
        n_features = X.shape[1]
        self.classes = np.unique(y)
        
        self.y_train = np.array([-1 if label == self.classes[0] else 1 for label in y])
        self.X_train = X
        y= self.y_train
        if (X.shape[0] != y.shape[0]):
            print "X and y don't have the same size :",X.shape, y.shape
        
        #compute the kernel matrix
        K = np.array([np.array([self.kernel_function(x,x2) for x2 in X])for x in X])
        self.K = K
        # Solves
        # min 1/2 x^T P x + q^T x
        # s.t.
        #  Gx \coneleq h
        #  Ax = b
        P = cvxopt.matrix(np.outer(y, y) * K)
        q = cvxopt.matrix(-1 * np.ones(n_samples))

        # -a_i \leq 0
        # TODO(tulloch) - modify G, h so that we have a soft-margin classifier
        G_std = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
        h_std = cvxopt.matrix(np.zeros(n_samples))

        # a_i \leq c
        G_slack = cvxopt.matrix(np.diag(np.ones(n_samples)))
        h_slack = cvxopt.matrix(np.ones(n_samples) * self.C)

        G = cvxopt.matrix(np.vstack((G_std, G_slack)))
        h = cvxopt.matrix(np.vstack((h_std, h_slack)))
        
        
        A = cvxopt.matrix(y, (1, n_samples),'d')
        b = cvxopt.matrix(0.0)

        solution = cvxopt.solvers.qp(P, q, G, h, A, b)
        self.alpha = solution['x']
        self.b = np.mean([y[j] - np.sum([self.alpha[i]*y[i] * K[i,j] for i in range(n_samples)])
                         for j in range(n_samples)]).mean()
        return self
        
            
    def predict(self, X):
        prediction = []
        K_test = np.array([np.array([self.kernel_function(x,x2) for x2 in X])for x in self.X_train])
        for i,x in enumerate(X):
            result = self.b
            for j in range(self.X_train.shape[0]):
                result+= self.alpha[j]*self.y_train[j]*K_test[j,i]
            if(np.sign(result) <0):
                prediction.append(self.classes[0])
            else:
                prediction.append(self.classes[1])
                
        return prediction

In [4]:
#from svm import SVM
class multiclass_svm(object):
    def __init__(self, kernel = "rbf", C =1.):
        self.kernel = kernel
        self.C = C
    
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.trained_classifiers = [
            [SVM(C = self.C, kernel = self.kernel).fit(X[(y==label) | (y== label2)], y[(y==label)|(y== label2)])
                                                if label2>label else 0
                                                for label2 in self.classes] 
                                                for label in self.classes ]
        return self
        
    def predict(self, X):
        win_count = np.zeros((X.shape[0],len(self.classes)))
        for i, label1 in enumerate(self.classes):
            for j, label2 in enumerate(self.classes):
                if label2>label1:
                    y_test = self.trained_classifiers[i][j].predict(X)
                    for k, winner in enumerate(y_test):
                        if winner == label1:
                            win_count[k,i]+=1
                        else:
                            win_count[k,j]+=1
                            
        return [self.classes[np.argmax(row)] for row in win_count]

In [5]:
import pandas as pd
path=""
X_train =pd.read_csv(path+"Xtr.csv", header=None)
Y =pd.read_csv(path+"Ytr.csv")
X_test =pd.read_csv(path+"Xte.csv", header=None)

y_train = Y["Prediction"].values
X_train = X_train.values
X_test = X_test.values

from sklearn.decomposition import KernelPCA
kpca = KernelPCA(kernel = 'rbf', degree = 3, n_components= 35, gamma= 0.01)

X_train_kpca = kpca.fit_transform(X_train)
X_test_kpca = kpca.transform(X_test)

In [6]:
def min_kernel(a, b):
    return np.sum(np.minimum(a,b))

In [None]:
svm = multiclass_svm(kernel='rbf',C= 10.)

In [None]:
#svm = SVC(C = 10.)
svm.fit(X_train_kpca,y_train)


     pcost       dcost       gap    pres   dres
 0:  4.1514e+03 -3.3504e+04  4e+04  7e-14  5e-14
 1:  5.6020e+02 -4.2956e+03  5e+03  5e-14  4e-14
 2: -3.0726e+02 -1.9818e+03  2e+03  2e-14  3e-14
 3: -4.9613e+02 -1.5693e+03  1e+03  6e-15  3e-14
 4: -6.0538e+02 -1.3419e+03  7e+02  4e-15  3e-14
 5: -6.7353e+02 -1.2115e+03  5e+02  4e-14  3e-14
 6: -7.3675e+02 -1.0924e+03  4e+02  5e-15  3e-14
 7: -7.7890e+02 -1.0148e+03  2e+02  2e-15  4e-14
 8: -8.0835e+02 -9.6547e+02  2e+02  5e-14  4e-14
 9: -8.2800e+02 -9.3117e+02  1e+02  2e-14  4e-14
10: -8.4303e+02 -9.0733e+02  6e+01  3e-14  4e-14
11: -8.5936e+02 -8.8120e+02  2e+01  1e-14  5e-14
12: -8.6474e+02 -8.7279e+02  8e+00  4e-14  6e-14
13: -8.6777e+02 -8.6944e+02  2e+00  1e-14  5e-14
14: -8.6842e+02 -8.6873e+02  3e-01  1e-14  5e-14
15: -8.6857e+02 -8.6857e+02  6e-03  4e-14  6e-14
16: -8.6857e+02 -8.6857e+02  6e-05  3e-14  6e-14
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  1.1304e+03 -3.1159e+04  3e+04  1e-13  9e-1

In [62]:
from sklearn.metrics import accuracy_score
predicted_label = svm.predict(X_test_kpca[0:100])
#print("SVM - Score on train_data : ", accuracy_score(y_train, svm.predict(X_train_kpca)))

fini
coucou


In [63]:
predicted_label

[7,
 2,
 1,
 0,
 4,
 1,
 4,
 4,
 6,
 9,
 0,
 6,
 9,
 0,
 1,
 5,
 9,
 7,
 2,
 4,
 9,
 6,
 6,
 5,
 4,
 0,
 7,
 4,
 0,
 1,
 3,
 1,
 3,
 6,
 7,
 2,
 7,
 1,
 2,
 1,
 1,
 7,
 4,
 2,
 3,
 5,
 1,
 2,
 4,
 4,
 6,
 3,
 5,
 5,
 6,
 0,
 4,
 1,
 9,
 5,
 7,
 8,
 9,
 1,
 9,
 4,
 6,
 4,
 3,
 0,
 7,
 0,
 2,
 8,
 1,
 7,
 3,
 7,
 9,
 7,
 7,
 6,
 2,
 7,
 8,
 4,
 7,
 3,
 6,
 1,
 3,
 6,
 8,
 3,
 1,
 4,
 1,
 8,
 6,
 9]