In [224]:
import torch
import numpy as np
import torch.nn.functional as F
import torch.nn as nn
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from collections import Counter

In [225]:
class LibSVM:
    def __init__(self, C = 1, lr = 0.01):
        '''
        超参数包括松弛变量C和学习率lr
        要学习的参数是一个线性超平面的权重和偏置
        '''
        self.C = C
        self.lr = lr
        self.weights = None
        self.bias = None
    
    def train(self):
        self.weights.requires_grad = True
        self.bias.requires_grad = True
        
    def eval(self):
        self.weights.requires_grad = False
        self.bias.requires_grad = False
    
    def fit(self, X, y, max_iters = 1000):
        '''
        X是数据张量, size(n,m)
        数据维度m, 数据数目n
        y是二分类标签, 只能是1或-1
        '''
        n,m = X.shape
        y = y.reshape(-1,1)
        self.weights = torch.randn(m,1)
        self.bias = torch.randn(1)
        self.train()
        
        for step in range(max_iters):
            out = X.mm(self.weights)+self.bias # 前向计算
            # 损失计算
            loss = 0.5*self.weights.T.mm(self.weights)+\
            self.C*torch.sum(F.relu(-y*out+1))
            # 自动求导
            loss.backward()
            # 梯度下降
            self.weights.data -= self.lr*self.weights.grad.data
            self.bias.data -= self.lr*self.bias.grad.data
            self.weights.grad.data.zero_()
            self.bias.grad.data.zero_()
            
        return loss
    
    def predict(self, x, raw = False):
        self.eval()
        out = x.mm(self.weights)+self.bias
        if raw: return out
        else: return torch.sign(out)

In [226]:
%cd ..\wine

C:\Users\chenyizhou\wine


In [227]:
wine_datas = pd.read_csv('winequality-white.csv',sep=';')
wine_datas = wine_datas.values
x = wine_datas[:,:11]
y = wine_datas[:,11]

In [228]:
Counter(y)

Counter({6.0: 2198, 5.0: 1457, 7.0: 880, 8.0: 175, 4.0: 163, 3.0: 20, 9.0: 5})

In [229]:
y

array([6., 6., 6., ..., 6., 7., 6.])

In [230]:
y[y>5.5]=10
y[y<5.5]=2
y[y==10]=1
y[y==2]=-1

In [231]:
Counter(y)

Counter({1.0: 3258, -1.0: 1640})

In [232]:
standar_data = StandardScaler()
standar_data.fit(x)
standar_datax = standar_data.transform(x)

In [233]:
x_train,x_test,y_train,y_test = train_test_split(standar_datax,y,test_size=0.3)

In [234]:
x_train = torch.tensor(x_train,dtype=torch.float32)
x_test = torch.tensor(x_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32)
y_test = torch.tensor(y_test,dtype=torch.float32)

In [235]:
model = LibSVM()

In [236]:
model.fit(x_train,y_train)

tensor([[10189.3867]], grad_fn=<AddBackward0>)

In [237]:
y_predict = model.predict(x_test)

In [238]:
y_predict = torch.squeeze(y_predict)
print(sum(y_predict == y_test).item()/len(y_predict))

0.7136054421768707


## SVM线性模型

In [239]:
from sklearn.svm import LinearSVC

In [240]:
svml_sk_model = LinearSVC()
svml_sk_model.fit(x_train,y_train)



LinearSVC()

In [241]:
svml_sk_model.score(x_test,y_test)

0.7612244897959184

## SVM非线性模型

In [242]:
from sklearn.svm import SVC

In [243]:
svm_sk_model = SVC()
svm_sk_model.fit(x_train,y_train)

SVC()

In [244]:
svm_sk_model.score(x_test,y_test)

0.7863945578231293