In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import os
import pandas as pd
import time
from tqdm import tqdm, tqdm_notebook 
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split

In [2]:
class SVM_cuda(nn.Module):
    #列出需要哪些層
    def __init__(self, feature_num, cls_num):
        super(SVM_cuda, self).__init__()
        self.fc = nn.Linear(feature_num, cls_num)     
    #列出forward的路徑，將init列出的層代入
    def forward(self, x):
        out = self.fc(x) 
        return out
    
class HingeLoss(torch.nn.Module):

    def __init__(self):
        super(HingeLoss, self).__init__()
        self.relu = nn.ReLU()

    def forward(self, output, target):
        all_ones = torch.ones_like(target)
        labels = 2 * target - all_ones
        losses = all_ones - torch.mul(output, labels)

        return torch.norm(self.relu(losses))

In [3]:
def SVM_train(training_data, val_data, test_data, config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # device = 'cpu'
    training_data = training_data.to(device)
    val_data = val_data.to(device)
    test_data = test_data.to(device)
    
    svm = SVM_cuda(config.feature_num, config.cls_num).to(device)
    optimizer = optim.Adam(svm.parameters(), lr=0.000001)
    # criterion = torch.nn.CrossEntropyLoss()
    criterion = HingeLoss()
    
    best_acc = 0
    best_model = None
#     early_stop = 50
    
    for epoch in tqdm_notebook(range(config.epoch)):
        training_data = training_data[torch.randperm(training_data.size()[0])].float()
        val_data = val_data[torch.randperm(val_data.size()[0])].float()
        test_data = test_data[torch.randperm(test_data.size()[0])].float()
        
        sum_loss = 0
        train_total = 0
        train_true = 0
        val_total = 0
        val_true = 0

        ########################                    
        # train the model      #
        ########################
        svm.train()
        for i in range(0, len(training_data), config.batch_size):
            x = training_data[i:i+config.batch_size, :-1]
            y = training_data[i:i+config.batch_size, -1].long()

            optimizer.zero_grad()
            
            output = svm(x)
            prob, pred = torch.relu(output).max(1)

            train_true += torch.sum(pred == y).item()
            # loss = criterion(prob, y)
            loss = criterion(output, y)
            # loss = torch.mean(torch.clamp(1 - y * prob, min=0))
            # loss += config.c / 2.0
            
            loss.backward()
            optimizer.step()
            
            sum_loss += float(loss)
            train_total += len(y)
        # print("train: epoch: {:4d}, loss: {:.3f}, accuracy: {}".format(epoch, sum_loss / train_total, train_true/ train_total))

        ########################
        # validate the model   #
        ########################
        svm.eval()
        for i in range(0, len(val_data), config.batch_size):
            x = val_data[i:i+config.batch_size, :-1]
            y = val_data[i:i+config.batch_size, -1]

            optimizer.zero_grad()
            
            with torch.no_grad():
                prob, pred = torch.relu(svm(x)).max(1)

            val_true += torch.sum(pred == y).item()
            val_total += len(y)
        # print("validation: epoch: {:4d}, loss: {:.3f}, accuracy: {}".format(epoch, sum_loss / val_total, val_true/ val_total))
        if best_acc <= val_true/ val_total:
            best_acc = val_true/ val_total
            best_model = copy.deepcopy(svm)
        

#     evaluation(best_model, test_data)
    return best_model

In [4]:
def evaluation(svm, test_data, config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    test_data = test_data.float().to(device)
    svm = svm.to(device)
    svm.eval()
    test_true = [0 for i in range(config.cls_num)]
    test_total = [0 for i in range(config.cls_num)]

    for i in range(0, len(test_data), config.batch_size):
            x = test_data[i:i+config.batch_size, :-1]
            y = test_data[i:i+config.batch_size, -1]
            
            with torch.no_grad():
                prob, pred = torch.relu(svm(x)).max(1)
            
            for i in range(config.cls_num):
                test_true[i] += torch.sum((pred == i) * (i == y)).item()
                test_total[i] += sum(y == i).item()
                #print(y)
                #print(i)
                #print('------------')
                #print(i, (pred == y), sum(y == i))
            
            #for i in range(7):
             #   print( test_true[i], test_total[i])
    
    print('Apple_pie accuracy: %f %%\n' % (test_true[0] /test_total[0]*100))
    print('Chocolate_cake accuracy: %f %%\n' % (test_true[1] /test_total[1]*100))
    print('Donuts accuracy: %f %%\n' % (test_true[2] /test_total[2]*100))
    print('Hamburger accuracy: %f %%\n' % (test_true[3] /test_total[3]*100))
    print('Hot_dog accuracy: %f %%\n' % (test_true[4] /test_total[4]*100))
    print('Ice_cream accuracy: %f %%\n' % (test_true[5] /test_total[5]*100))
    print('Pizza accuracy: %f %%\n' % (test_true[6] /test_total[6]*100))
    

In [5]:
class Config():
    def __init__(self):
        self.feature_num = 0
        self.cls_num = 7
        # self.c = 1
        self.batch_size = 2500
        self.epoch = 5000

In [6]:
config = Config()

train_path = os.path.join('.', 'Train_food7.csv')
# train_path = os.path.join('..', 'dataset', 'training.csv')
# test_path = os.path.join('..', 'dataset', 'Train_food7.csv')
start = time.time()
train_pd = pd.read_csv(train_path)
end = time.time()
print(end - start)
# test_pd = pd.read_csv(test_path)

config.feature_num = train_pd.shape[1]-1


training_data = torch.tensor(train_pd.values)
training_data, test_data = train_test_split(training_data.numpy(), random_state=777, train_size=0.7)
training_data, test_data = torch.tensor(training_data), torch.tensor(test_data)
# val_data = torch.tensor(test_pd.values)
# test_data = torch.tensor(test_pd.values)

48.70829224586487


In [7]:
start = time.time()
svm = SVM_train(training_data, test_data, test_data, config)
end = time.time()
evaluation(svm, test_data, config)
print(end - start)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))


Apple_pie accuracy: 63.779528 %

Chocolate_cake accuracy: 31.058824 %

Donuts accuracy: 40.000000 %

Hamburger accuracy: 24.047619 %

Hot_dog accuracy: 7.281553 %

Ice_cream accuracy: 25.708061 %

Pizza accuracy: 20.092379 %

49.0735297203064


In [8]:
from sklearn.svm import SVC
import numpy as np
svm = SVC(kernel='linear', probability=True)
start = time.time()
svm.fit(training_data[:,:-1], training_data[:,-1])
end = time.time()
print(end - start)
for i in range(7):
    print('class', i , ':', sum((np.array(svm.predict(test_data[:,:-1])) == np.array(test_data[:,-1])) * 
          (np.array(test_data[:,-1] == i)) / sum(np.array(test_data[:,-1] == i))))

4284.485014915466
class 0 : 0.816272965879262
class 1 : 0.7929411764705868
class 2 : 0.814634146341464
class 3 : 0.7666666666666645
class 4 : 0.7766990291262134
class 5 : 0.7559912854030507
class 6 : 0.7505773672055414


In [9]:
from sklearn.svm import SVC
import numpy as np
svm = SVC(kernel='rbf', probability=True)
start = time.time()
svm.fit(training_data[:,:-1], training_data[:,-1])
end = time.time()
print(end - start)
sum((np.array(svm.predict(test_data[:,:-1])) == np.array(test_data[:,-1])) * 
    (np.array(test_data[:,-1] == 0)) / sum(np.array(test_data[:,-1] == 0)))



7576.523428201675


0.6929133858267705

In [None]:
for i in range(7):
    print('class', i , ':', sum((np.array(svm.predict(test_data[:,:-1])) == np.array(test_data[:,-1])) * 
          (np.array(test_data[:,-1] == i)) / sum(np.array(test_data[:,-1] == i))))