# 1准备数据

In [None]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from pprint import pprint
from matplotlib import pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from scipy.signal import savgol_filter #滤波
from sklearn.preprocessing import MinMaxScaler  
from torch.utils.data import Dataset,DataLoader
import torchkeras 

In [None]:
import wandb 
wandb.login() 

In [None]:
from argparse import Namespace
from torch.optim import lr_scheduler
config = Namespace(
    project_name = "multipleclass",
    file_path = "../alldata.csv",
    batch_size = 64,
    dropout_p = 0.1,
    lr = 1e-4,
    optim_type = 'Adam',
    epochs = 100,
    ckpt_path = 'checkpoint',
    num_workers=0
    #name
)

torch.manual_seed(17) #cpu
torch.cuda.manual_seed(17) #gpu
np.random.seed(17) #numpy

In [None]:
class MyDataset(Dataset):
    def __init__(self,filepath):
        self.alldata=pd.read_csv(filepath,header=None)
        self.len=self.alldata.shape[0]
        self.alldata=np.array(self.alldata,dtype='float32')
        self.xdata=torch.from_numpy(self.alldata[:,0:-2])
        self.ydata=torch.from_numpy(self.alldata[:,[-1]])##多分类
    def __getitem__(self,index):
        xx=self.xdata[index]
        lb=savgol_filter(xx, window_length=7, polyorder=2)#Savitzky-Golay 平滑滤波器
        scaler=MinMaxScaler()
        lb=lb.reshape(-1,1)
        lb=scaler.fit_transform(lb)#层归一化
        lb=lb.reshape(1,-1)
        return lb,self.ydata[index]
    def __len__(self):
        return self.len
dfdata = MyDataset(config.file_path)

In [None]:
#dataset
dfdata.ydata=dfdata.ydata.squeeze(1)#
dfdata.ydata=dfdata.ydata.to(dtype=torch.int64) #使用交叉熵做损失函数时
dftmp, dftest_raw = train_test_split(dfdata, random_state=40, test_size=0.1)
dftrain_raw, dfval_raw = train_test_split(dftmp, random_state=40, test_size=0.2)



In [None]:
print("len(dftrain) = ",len(dftrain_raw))
print("len(dfval) = ",len(dfval_raw))
print("len(dftest) = ",len(dftest_raw))
print(dfdata.xdata.shape)
print(dfdata.ydata.shape)
print(type(dfdata.ydata[0].item()))

In [None]:
#dataloader
dl_train =DataLoader(dftrain_raw, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
dl_val =DataLoader(dfval_raw, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)
dl_test =DataLoader(dftest_raw, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)

In [None]:
for features,labels in dl_val:
    break
print(features.shape)
print(labels.shape)
print(dl_train.__len__())

In [None]:
# windows操作系统
plt.rcParams['font.sans-serif']=['SimHei']  # 用来正常显示中文标签 
plt.rcParams['axes.unicode_minus']=False  # 用来正常显示负号

In [None]:
fig = plt.figure(figsize=(8,8)) 
for i in range(9):
    img,label =dftrain_raw[i]
    img=img.squeeze(0)
    plt.subplot(3,3,i+1)
    plt.plot(np.arange(0,5000),img)
    plt.title(label.item())  # 设置标题  
    plt.xlabel('time')  # 设置x轴标签  
    plt.ylabel('Sales')  #  
plt.subplots_adjust(hspace=0.5,wspace=0.4)
plt.show()


In [None]:
num0=0
num1=0
num2=0
num3=0
num4=0
num5=0
num6=0
num7=0
num8=0
num9=0
num10=0
num11=0
num12=0
num13=0
numall=0
print(dfdata.ydata[1].item())
for i in dfdata.ydata:
    if(i.item()==0):
        num0+=1
    if(i.item()==1):
        num1+=1
    if(i.item()==2):
        num2+=1
    if(i.item()==3):
        num3+=1
    if(i.item()==4):
        num4+=1
    if(i.item()==5):
        num5+=1
    if(i.item()==6):
        num6+=1
    if(i.item()==7):
        num7+=1
    if(i.item()==8):
        num8+=1
    if(i.item()==9):
        num9+=1
    if(i.item()==10):
        num10+=1
    if(i.item()==11):
        num11+=1
    if(i.item()==12):
        num12+=1
    if(i.item()==13):
        num13+=1
    numall+=1
print(numall)
fig = plt.figure(figsize=(6,6)) 
xx=np.array(['class0','class1','class2','class3','class4','class5','class6','class7','class8','class9','class10','class11','class12','class13'])
yy=np.array([num0,num1,num2,num3,num4,num5,num6,num7,num8,num9,num10,num11,num12,num13])     
plt.pie(yy,labels=xx,autopct='%1.1f%%')
plt.show()

In [None]:
#柱状图
plt.bar(xx, yy, facecolor='#1f77b4', edgecolor='k')
plt.xticks(rotation=90)
plt.tick_params(labelsize=15)
plt.xlabel('类别', fontsize=20)
plt.ylabel('数量', fontsize=20)

In [None]:
plt.figure(figsize=(20, 7))
xp = xx
ytr = yy*0.72
yva = yy*0.18
yte=yy*0.1

width = 0.5 # 柱状图宽度

plt.xticks() # 横轴文字旋转

plt.bar(xp, yte, width, label='测试集')
plt.bar(xp, yva, width, label='验证集', bottom=yte)
plt.bar(xp, ytr, width, label='训练集', bottom=yva)

plt.xlabel('类别', fontsize=20)
plt.ylabel('数量', fontsize=20)
plt.tick_params(labelsize=20) # 设置坐标文字大小

plt.legend(fontsize=16,loc='upper right') # 图例

# 保存为高清的 pdf 文件
# plt.savefig('各类别图像数量.pdf', dpi=120, bbox_inches='tight')

plt.show()

# 创建模型

In [None]:
class SELayer(nn.Module):  
    def __init__(self, channel, reduction=16):  
        super(SELayer, self).__init__()  
        self.avg_pool = nn.AdaptiveAvgPool1d(1)  
        self.fc = nn.Sequential(  
            nn.Linear(channel, channel // reduction, bias=False),  
            nn.ReLU(inplace=True),  
            nn.Linear(channel // reduction, channel, bias=False),  
            nn.Sigmoid()  
        )  
  
    def forward(self, x):  
        b, c, _ = x.size()  
        y = self.avg_pool(x).view(b, c)  
        y = self.fc(y).view(b, c, 1)  
        return x * y.expand_as(x)

class create_net(nn.Module):
    def __init__(self):
        super(create_net,self).__init__()
        self.conv1=nn.Conv1d(in_channels=1,out_channels=8,kernel_size = 5, stride=1, padding=2)
        self.relu=nn.ReLU()
        self.maxpool= nn.MaxPool1d(2)#等价于nn.MaxPool1d(kernel_size=2, stride=2)
        self.conv2=nn.Conv1d(in_channels=8,out_channels=16,kernel_size = 5, stride=1, padding=2)
        self.conv3=nn.Conv1d(in_channels=16,out_channels=32,kernel_size = 5, stride=1, padding=2)
        self.conv4=nn.Conv1d(in_channels=32,out_channels=64,kernel_size = 5, stride=2, padding=1)
        self.conv5=nn.Conv1d(in_channels=64,out_channels=128,kernel_size = 5, stride=1, padding=2)
        self.fc1= nn.Linear(128*78,5000)
        self.fc2= nn.Linear(5000,2500)
        self.fc3= nn.Linear(2500,1000)
        self.fc4= nn.Linear(1000,64)
        self.fc5= nn.Linear(64,14)
    def forward(self,x):
        x=self.conv1(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=self.conv2(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=self.conv3(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=self.conv4(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=self.conv5(x)
        x=self.relu(x)
        x=self.maxpool(x)
        x=x.view(-1,128*78)
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=F.relu(self.fc3(x))
        x=F.relu(self.fc4(x))
        x=self.fc5(x)
        output=F.log_softmax(x,dim=1)
        return output

In [None]:
class multiAccuracy(nn.Module):
    """Accuracy for multi-classification task."""
    def __init__(self):
        """Initialize the Accuracy module."""
        super().__init__()
        # Counters for correct and total predictions
        self.correct = nn.Parameter(torch.tensor(0.0), requires_grad=False)
        self.total = nn.Parameter(torch.tensor(0.0), requires_grad=False)

    def forward(self, preds: torch.Tensor, targets: torch.Tensor):
        preds = preds.argmax(dim=-1)
        targets = targets.reshape(-1)
        m = (preds == targets).sum()
        n = targets.shape[0] 
        self.correct += m 
        self.total += n
        
        return m/n

    def compute(self):
         return self.correct.float() / self.total 

    def reset(self):
        self.correct -= self.correct
        self.total -= self.total

In [None]:
from torchkeras import summary
net = create_net()
summary(net,input_data=features);

# 训练

In [None]:
#训练2
from torchkeras import KerasModel
#对多分类模型来说，要用Macro Average（宏平均）或Micro Average（微平均）规则来进行F1（或者P、R）的计算。
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score#使用函数调转真实值与预测值的位置precision_score(labels, preds, average='macro')
# net = create_net()
loss_fn = nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(net.parameters(),lr=config.lr)
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
metric_dict = {"acc":multiAccuracy()}
model = KerasModel(net,
                   loss_fn = loss_fn,
                   metrics_dict= metric_dict,
                   optimizer = optimizer
#                    ,
#                    lr_scheduler=lr_scheduler
                  )   



In [None]:
from torchkeras.kerascallbacks import WandbCallback
wandb_cb = WandbCallback(project=config.project_name,
                         config=config,
                         name=None,
                         save_code=True,
                         save_ckpt=True)

In [None]:
dfhistory = model.fit(
      train_data=dl_train,
      val_data=dl_val,
      epochs=config.epochs,
      ckpt_path='checkpoint',
      patience=40,
      monitor='val_acc',
      mode='max',
      callbacks = [wandb_cb]
)

# 训练集、测试集评估

In [None]:
dfhistory.tail()

In [None]:
print(dfhistory['val_loss'].min())
print(dfhistory['train_loss'].min())
print(dfhistory['val_acc'].max())

In [None]:
model.evaluate(dl_train)

In [None]:
model.evaluate(dl_val)