# 1准备数据

In [None]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from pprint import pprint
from matplotlib import pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from scipy.signal import savgol_filter #滤波
from sklearn.preprocessing import MinMaxScaler  
from torch.utils.data import Dataset,DataLoader
import torchkeras 

In [None]:
from argparse import Namespace
from torch.optim import lr_scheduler
config = Namespace(
    project_name = "two-class",
    file_path = "../alldata.csv",
    batch_size = 64,
    dropout_p = 0.1,
    lr = 1e-4,
    optim_type = 'Adam',
    epochs = 100,
    ckpt_path = 'checkpoint',
    num_workers=0
)

torch.manual_seed(17) #cpu
torch.cuda.manual_seed(17) #gpu
np.random.seed(17) #numpy

In [None]:
class MyDataset(Dataset):
    def __init__(self,filepath):
        self.alldata=pd.read_csv(filepath,header=None)
        self.len=self.alldata.shape[0]
        self.alldata=np.array(self.alldata,dtype='float32')
        self.xdata=torch.from_numpy(self.alldata[:,0:-2])
        self.ydata=torch.from_numpy(self.alldata[:,[-2]])##二分类
    def __getitem__(self,index):
        xx=self.xdata[index]
        lb=savgol_filter(xx, window_length=7, polyorder=2)#Savitzky-Golay 平滑滤波器
        scaler=MinMaxScaler()
        lb=lb.reshape(-1,1)
        lb=scaler.fit_transform(lb)#层归一化
        lb=lb.reshape(1,-1)
        return lb,self.ydata[index]
    def __len__(self):
        return self.len
dfdata = MyDataset(config.file_path)

In [None]:
temp_b= torch.mean(dfdata.xdata, dim=0)
temp_c=torch.std(dfdata.xdata, dim=0)
dfdata.xdata= (dfdata.xdata- temp_b) /temp_c
dftmp, dftest_raw = train_test_split(dfdata, random_state=40, test_size=0.1)
dftrain_raw, dfval_raw = train_test_split(dftmp, random_state=40, test_size=0.2)

In [None]:
print("len(dftrain) = ",len(dftrain_raw))
print("len(dfval) = ",len(dfval_raw))
print("len(dftest) = ",len(dftest_raw))
print(dfdata.xdata.shape)
print(dfdata.ydata.shape)
print(type(dfdata.ydata[0].item()))

In [None]:
#dataloader
dl_train =DataLoader(dftrain_raw, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
dl_val =DataLoader(dfval_raw, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)
dl_test =DataLoader(dftest_raw, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)

In [None]:
for features,labels in dl_val:
    break
print(features.shape)
print(labels.shape)
print(dl_train.__len__())

In [None]:
# windows操作系统
plt.rcParams['font.sans-serif']=['SimHei']  # 用来正常显示中文标签 
plt.rcParams['axes.unicode_minus']=False  # 用来正常显示负号

In [None]:
fig = plt.figure(figsize=(8,8)) 
for i in range(9):
    img,label =dftrain_raw[i]
    img=img.squeeze(0)
    plt.subplot(3,3,i+1)
    plt.plot(np.arange(0,5000),img)
    plt.title(label.item())  # 设置标题  
    plt.xlabel('time')  # 设置x轴标签  
    plt.ylabel('Sales')  #  
plt.subplots_adjust(hspace=0.5,wspace=0.4)
plt.show()


In [None]:
fig = plt.figure(figsize=(8,8)) 
for i in range(9):
    img,label =dftrain_raw[i]
    img=img.squeeze(0)
    plt.subplot(3,3,i+1)
    plt.plot(np.arange(0,5000),img)
    plt.title(label.item())  # 设置标题  
    plt.xlabel('time')  # 设置x轴标签  
    plt.ylabel('Sales')  #  
plt.subplots_adjust(hspace=0.5,wspace=0.4)
plt.show()


In [None]:
num0=0
num1=0
numall=0
for i in dfdata.ydata:
    if(i.item()==0):
        num0+=1
    if(i.item()==1):
        num1+=1
    numall+=1

fig = plt.figure(figsize=(6,6)) 
xx=np.array(['class0','class1'])
yy=np.array([num0,num1])     
plt.pie(yy,labels=xx,autopct='%1.1f%%')
plt.show()

In [None]:
#柱状图
plt.bar(xx, yy, facecolor='#1f77b4', edgecolor='k')
plt.xticks(rotation=90)
plt.tick_params(labelsize=15)
plt.xlabel('类别', fontsize=20)
plt.ylabel('数量', fontsize=20)

In [None]:
plt.figure(figsize=(20, 7))
xp = xx
ytr = yy*0.72
yva = yy*0.18
yte=yy*0.1

width = 0.5 # 柱状图宽度

plt.xticks() # 横轴文字旋转

plt.bar(xp, yte, width, label='测试集')
plt.bar(xp, yva, width, label='验证集', bottom=yte)
plt.bar(xp, ytr, width, label='训练集', bottom=yva)

plt.xlabel('类别', fontsize=20)
plt.ylabel('数量', fontsize=20)
plt.tick_params(labelsize=20) # 设置坐标文字大小

plt.legend(fontsize=16,loc='upper right') # 图例

# 保存为高清的 pdf 文件
# plt.savefig('各类别图像数量.pdf', dpi=120, bbox_inches='tight')

plt.show()

# 创建模型

In [None]:

class create_net(nn.Module):
    def __init__(self):
        super(create_net,self).__init__()
        self.conv1=nn.Conv1d(in_channels=1,out_channels=8,kernel_size = 5, stride=2, padding=2)
        self.relu=nn.ReLU()
        self.maxpool= nn.MaxPool1d(2)#等价于nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2=nn.Conv1d(in_channels=8,out_channels=16,kernel_size = 5, stride=2, padding=2)
        self.conv3=nn.Conv1d(in_channels=16,out_channels=64,kernel_size = 5, stride=2, padding=2)
        self.fc1= nn.Linear(64*78,1024)
        self.fc2= nn.Linear(1024,256)
        self.fc3= nn.Linear(256,32)
        self.fc4= nn.Linear(32,1)
#         self.fc5= nn.Linear(64,14)
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=self.conv2(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=self.conv3(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=x.view(-1,64*78)
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=F.relu(self.fc3(x))
#         x=F.relu(self.fc4(x))
        x=self.fc4(x)
#         output=F.log_softmax(x,dim=1)
        return x

In [None]:
class AUC(nn.Module):
    'approximate AUC calculation for binary-classification task'
    def __init__(self):
        super().__init__()
        self.tp = nn.Parameter(torch.zeros(10001),requires_grad=False)
        self.fp = nn.Parameter(torch.zeros(10001),requires_grad=False)
        
    def eval_auc(self,tp,fp):
        tp_total = torch.sum(tp)
        fp_total = torch.sum(fp)
        length = len(tp)
        tp_reverse = tp[range(length-1,-1,-1)]
        tp_reverse_cum = torch.cumsum(tp_reverse,dim=0)-tp_reverse/2.0
        fp_reverse = fp[range(length-1,-1,-1)]
        
        auc = torch.sum(torch.true_divide(tp_reverse_cum,tp_total)
                        *torch.true_divide(fp_reverse,fp_total))
        return auc
        
    def forward(self, preds: torch.Tensor, targets: torch.Tensor):
        y_pred = (10000*torch.sigmoid(preds)).reshape(-1).type(torch.int)
        y_true = targets.reshape(-1)
        
        tpi = self.tp-self.tp
        fpi = self.fp-self.fp
        assert y_pred.shape == y_true.shape
        for i,label in enumerate(y_true):
            if label>=0.5:
                tpi[y_pred[i]]+=1.0
            else:
                fpi[y_pred[i]]+=1.0
        self.tp+=tpi
        self.fp+=fpi
        return self.eval_auc(tpi,fpi)
          
    def compute(self):
        return self.eval_auc(self.tp,self.fp)
    
    def reset(self):
        self.tp-=self.tp
        self.fp-=self.fp

In [None]:
from torchkeras import summary
net = create_net()
summary(net,input_data=features);

# 训练

In [None]:
from torchkeras import KerasModel 
from torchkeras.metrics import Accuracy
from torchkeras.metrics import Precision
from torchkeras.metrics import Recall

from torchkeras.kerascallbacks import WandbCallback
net2 = create_net()
loss_fn = nn.BCEWithLogitsLoss()
optimizer= torch.optim.Adam(net2.parameters(),lr=config.lr)
metric_dict = {"acc":Accuracy(),"pre":Precision(),"recall":Recall(),"auc":AUC()}
model = KerasModel(net2,
                   loss_fn = loss_fn,
                   metrics_dict= metric_dict,
                   optimizer = optimizer
                  )   



In [None]:
dfhistory = model.fit(
      train_data=dl_train,
      val_data=dl_val,
      epochs=config.epochs,
      ckpt_path='checkpoint',
      patience=30,
      monitor='val_acc',
      mode='max'

)

# 训练集、测试集评估

In [None]:
dfhistory.tail()

In [None]:
print(dfhistory['val_loss'].min())
print(dfhistory['train_loss'].min())
print(dfhistory['val_acc'].max())

In [None]:
model.evaluate(dl_train)

In [None]:
model.evaluate(dl_val)