In [127]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
import matplotlib
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [128]:
class LibSVM:
    def __init__(self, C = 1, lr = 0.01):
        '''
        超参数包括松弛变量C和学习率lr
        要学习的参数是一个线性超平面的权重和偏置
        '''
        self.C = C
        self.lr = lr
        self.weights = None
        self.bias = None
    
    def train(self):
        self.weights.requires_grad = True
        self.bias.requires_grad = True
        
    def eval(self):
        self.weights.requires_grad = False
        self.bias.requires_grad = False
    
    def fit(self, X, y, max_iters = 1000):
        '''
        X是数据张量, size(n,m)
        数据维度m, 数据数目n
        y是二分类标签, 只能是1或-1
        '''
        n,m = X.shape
        y = y.reshape(-1,1)
        self.weights = torch.randn(m,1)
        self.bias = torch.randn(1)
        self.train()
        
        for step in range(max_iters):
            out = X.mm(self.weights)+self.bias # 前向计算
            # 损失计算
            loss = 0.5*self.weights.T.mm(self.weights)+\
            self.C*torch.sum(F.relu(-y*out+1))
            # 自动求导
            loss.backward()
            # 梯度下降
            self.weights.data -= self.lr*self.weights.grad.data
            self.bias.data -= self.lr*self.bias.grad.data
            self.weights.grad.data.zero_()
            self.bias.grad.data.zero_()
            
        return loss
    
    def predict(self, x, raw = False):
        self.eval()
        out = x.mm(self.weights)+self.bias
        if raw: return out
        else: return torch.sign(out)

In [129]:
%cd 

C:\Users\chenyizhou


In [130]:
epi_datas = pd.read_csv('epi_r_filtered_5.csv')
epi_datas = epi_datas.values
x = epi_datas[:,1:]
y = epi_datas[:,0]

In [131]:
y[y==0.0]=1
y[y==1.25]=1
y[y==1.875]=1
y[y==2.5]=1
y[y==3.125]=1
y[y==3.75]=1
y[y==4.375]=-1
y[y==5.0]=-1

In [132]:
standar_data = StandardScaler()
standar_data.fit(x)
standar_datax = standar_data.transform(x)

In [133]:
x_train,x_test,y_train,y_test = train_test_split(standar_datax,y,test_size=0.3)

In [134]:
x_train = torch.tensor(x_train,dtype=torch.float32)
x_test = torch.tensor(x_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32)
y_test = torch.tensor(y_test,dtype=torch.float32)

In [135]:
model = LibSVM()

In [136]:
model.fit(x_train,y_train)

tensor([[172639.5156]], grad_fn=<AddBackward0>)

In [137]:
y_predict = model.predict(x_test)

In [138]:
def get_metrics(y_true, y_pred):
    # 获取常见的4个值，用于系列指标计算
    TN, FP, FN, TP = np.fromiter((sum(
        bool(j >> 1) == bool(y_true[i]) and
        bool(j & 1) == bool(y_pred[i])
        for i in range(len(y_true))
    ) for j in range(4)), float)

    Accuracy = (TN + TP) / (TN + FP + FN + TP + 1e-8)
    Precision = TP / (TP + FP + 1e-8)
    # True Positive Rate
    Recall = TP / (TP + FN + 1e-8)
    # False Positive Rate
    FPR = FP / (FP + TN + 1e-8)

    print("Precision", Precision)
    print("Recall", Recall)
    print('Accuracy',Accuracy)

    # F_measure = 2 * Recall * Precision / (Recall + Precision + 1e-8)
    # g_mean = np.sqrt((TN / (TN + FP + 1e-8)) * (TP / (TP + FN + 1e-8)))
    # Balance = 1 - np.sqrt((0 - FPR) ** 2 + (1 - Recall) ** 2) / np.sqrt(2)
    MCC = (TP * TN - FN * FP) / np.sqrt((TP + FN) * (TP + FP) * (FN + TN) * (FP + TN) + 1e-8)

    # 当F_measure中θ值为2时
    F_2 = 5 * Recall * Precision / (4 * Recall + Precision + 1e-8)
    # G_measure = 2 * Recall * (1 - FPR) / (Recall + (1 - FPR) + 1e-8)
    # NMI = normalized_mutual_info_score(y_true, y_pred, average_method="arithmetic")

    # 返回所有指标值 vars() 函数返回对象object的属性和属性值的字典对象。
    y_pred = vars()
    # 该字典不返回'y_true', 'y_pred', "TN", "FP", "FN", "TP"这些key值
    return {k: y_pred[k] for k in reversed(list(y_pred)) if k not in ['y_true', 'y_pred', "TN", "FP", "FN", "TP", "FPR"]}

In [139]:
get_metrics(y_test,y_predict)

Precision 0.9999999999976191
Recall 0.9999999999976191
Accuracy 0.9999999999976191


{'F_2': 0.9999999979976192,
 'MCC': 0.0,
 'Recall': 0.9999999999976191,
 'Precision': 0.9999999999976191,
 'Accuracy': 0.9999999999976191}