In [1]:
# 原始模型

import torch
from torch import nn
from torchvision import datasets, transforms
from torch import optim
from torch.utils.data import DataLoader, Dataset, random_split, Subset
from tqdm import tqdm,trange
import torch.nn.functional as F

In [2]:
class ActionDatasets(Dataset):
    def __init__(self, csv_path, transform=None, target_transform=None, pick_path = "data.pkl"):
        super(ActionDatasets, self).__init__()
        self.transform = transform
        self.target_transform = target_transform
        import pandas as pd
        from glob import glob
        import os
        csvs = glob(os.path.join(csv_path, "*.csv"))

        if len(csvs) == 0:
            raise ValueError("路径下不存在csv文件")
            return
        df = []

        if os.path.exists(pick_path):
            df = pd.read_pickle(pick_path)
        else:
            for label, csv in enumerate(csvs):
                if type(df) == list:
                    df = pd.read_csv(csv)
                    df['label'] = label
                else:
                    df_tmp = pd.read_csv(csv)
                    df_tmp['label'] = label
                    df = pd.concat([df, df_tmp])
            df.to_pickle(pick_path)
        self.data = df
        self.values = self.data.values
        
    def __getitem__(self, idx):
        train_data, label_data = torch.tensor(self.values[idx*40:(idx+1)*40, 1:-2],dtype=torch.float32) ,torch.tensor(self.values[idx*40:(idx+1)*40, -1][0], dtype=torch.long)

        if self.transform:
            train_data = self.transform(train_data)
        if self.target_transform:
            label_data = self.target_transform(label_data)

        return train_data ,label_data

    def __len__(self):
        return len(self.data)//40


In [3]:
classes = 5  #分类
hidden_dim = 64 # rnn隐藏单元数
lr = 0.001 # 学习率
epoches = 20 #训练次数
batch_size = 128 # 每一个训练批次数量
input_dim= 42
device = "cuda" if torch.cuda.is_available() else "cpu"

print(device)

cuda


In [4]:
def create_data_loader():
    train_data_path = "D:\\temp\\augment_action_windows"
    # train_data_path = "D:\\temp\\augment_action_windows"
    datasets = ActionDatasets(train_data_path, transform=torch.tensor, target_transform=torch.tensor, pick_path='train.pkl')
    test_datasets = ActionDatasets("D:\\temp\\yan1_action_windows\\action_windows", transform=torch.tensor, target_transform=torch.tensor, pick_path='test.pkl')
    split_rate = 0.8  # 训练集占整个数据集的比例
    train_len = int(split_rate * len(datasets))
    valid_len = len(datasets) - train_len

    train_sets, valid_sets = random_split(datasets, [train_len, valid_len], generator=torch.Generator().manual_seed(42))

    train_loader = DataLoader(train_sets, batch_size=batch_size, shuffle=True,drop_last=True,pin_memory=True)
    test_loader = DataLoader(test_datasets, batch_size=batch_size, shuffle=True,drop_last=True,pin_memory=True)
    valid_loader = DataLoader(valid_sets, batch_size=batch_size, shuffle=True,drop_last=True,pin_memory=True)

    print(f"训练集大小{len(train_sets)}， 验证集大小{len(valid_sets)}")
    return train_loader, test_loader, valid_loader
train_loader, test_loader, valid_loader = create_data_loader()

训练集大小63980， 验证集大小15996


In [5]:
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim):
        super(RNN, self).__init__()
        self.rnn = nn.LSTM(input_dim, hidden_dim, 3, batch_first=True)
        self.dropout1 = nn.Dropout(p=0.3)
        
        self.linear1 = nn.Linear(hidden_dim, 64)
        self.linear2 = nn.Linear(64, out_dim)
    def forward(self, X):
        out, status = self.rnn(X)
        out = F.relu(out[:,-1,:])
        out = F.relu(self.linear1(out))
        out = self.dropout1(out)
        out = self.linear2(out)

        return out

In [6]:
rnn = RNN(input_dim, hidden_dim, classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), lr=lr)

In [7]:
def GETACC(loader=valid_loader):
    rnn.eval()
    cnt = 0
    sum_valid_acc = 0
    sum_valid_loss = 0
    for data, label in loader:
        data = data.to(device)
        label = label.to(device)
        out = rnn(data)
        
        _, predict = torch.max(out, 1)
        
        loss = criterion(out, label)
        sum_valid_loss += loss.item()
        acc = torch.sum((predict == label).int()) / batch_size
        sum_valid_acc += acc
        cnt+=1
    
    return sum_valid_loss/cnt, sum_valid_acc/cnt

In [8]:
for epoch in range(epoches):
    i = 0
    loss_sum = 0
    bar = tqdm(train_loader)
    for ii, (data , label) in enumerate(bar):
        rnn.train()
        data = data.to(device)
        label = label.to(device)
        out = rnn(data)
        loss = criterion(out, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        i+=1
        loss_sum += loss.item()

        if ii == 0:
            valid_loss,valid_acc = GETACC(valid_loader)
            
            bar.set_description(f"epoch = {epoch} train_loss = {loss_sum/i} valid_loss = {valid_loss} valid_acc= {valid_acc}")


epoch = 0 train_loss = 1.6120792627334595 valid_loss = 1.607880014565683 valid_acc= 0.19959676265716553: 100%|██████████| 499/499 [00:10<00:00, 47.94it/s]
epoch = 1 train_loss = 0.3518986403942108 valid_loss = 0.3434682425952727 valid_acc= 0.8690776228904724: 100%|██████████| 499/499 [00:09<00:00, 52.60it/s]
epoch = 2 train_loss = 0.2689221203327179 valid_loss = 0.25528301845394796 valid_acc= 0.8986895084381104: 100%|██████████| 499/499 [00:09<00:00, 52.62it/s]
epoch = 3 train_loss = 0.25424203276634216 valid_loss = 0.2247106836688134 valid_acc= 0.9061239361763: 100%|██████████| 499/499 [00:09<00:00, 53.16it/s]
epoch = 4 train_loss = 0.19590070843696594 valid_loss = 0.20892722862622432 valid_acc= 0.9157635569572449: 100%|██████████| 499/499 [00:09<00:00, 52.95it/s]
epoch = 5 train_loss = 0.11097963154315948 valid_loss = 0.1984835509210825 valid_acc= 0.9165826439857483: 100%|██████████| 499/499 [00:09<00:00, 52.18it/s]
epoch = 6 train_loss = 0.10062102228403091 valid_loss = 0.1875666415

In [9]:
test_loss,test_acc = GETACC(test_loader)
print(f"test_loss = {test_loss}, test_acc = {test_acc}")

test_loss = 0.8597071766853333, test_acc = 0.7957589626312256
