In [1]:
# 原始模型

import torch
from torch import nn
from torchvision import datasets, transforms
from torch import optim
from torch.utils.data import DataLoader, Dataset, random_split, Subset
from tqdm import tqdm,trange
import torch.nn.functional as F

In [2]:
class ActionDatasets(Dataset):
    def __init__(self, csv_path, transform=None, target_transform=None, pick_path = "data.pkl"):
        super(ActionDatasets, self).__init__()
        self.transform = transform
        self.target_transform = target_transform
        import pandas as pd
        from glob import glob
        import os
        csvs = glob(os.path.join(csv_path, "*.csv"))

        if len(csvs) == 0:
            raise ValueError("路径下不存在csv文件")
            return
        df = []

        if os.path.exists(pick_path):
            df = pd.read_pickle(pick_path)
        else:
            for label, csv in enumerate(csvs):
                if type(df) == list:
                    df = pd.read_csv(csv)
                    df['label'] = label
                else:
                    df_tmp = pd.read_csv(csv)
                    df_tmp['label'] = label
                    df = pd.concat([df, df_tmp])
            df.to_pickle(pick_path)
            
        self.A_values = df[['aAX', 'aAY', 'aAZ','bAX', 'bAY', 'bAZ', 'cAX', 'cAY', 'cAZ', 'dAX', 'dAY', 'dAZ', 'eAX', 'eAY', 'eAZ', 'fAX', 'fAY', 'fAZ', 'gAX', 'gAY', 'gAZ']].values

        self.B_values = df[['aWX', 'aWY', 'aWZ','bWX', 'bWY', 'bWZ', 'cWX', 'cWY', 'cWZ', 'dWX', 'dWY', 'dWZ', 'eWX', 'eWY', 'eWZ', 'fWX', 'fWY', 'fWZ', 'gWX', 'gWY', 'gWZ']].values
        self.labels_values = df[['label']].values
        
    def __getitem__(self, idx):
            
        A_data = torch.tensor(self.A_values[idx*40:(idx+1)*40, :],dtype=torch.float32)
        B_data = torch.tensor(self.B_values[idx*40:(idx+1)*40, :],dtype=torch.float32)
        label_data = torch.tensor(self.labels_values[idx*40:(idx+1)*40, -1][0], dtype=torch.long)

        if self.transform:
            A_data = self.transform(A_data)
            B_data = self.transform(B_data)
        if self.target_transform:
            label_data = self.target_transform(label_data)

        return (A_data, B_data), label_data

    def __len__(self):
        return self.labels_values.shape[0]//40


In [3]:
classes = 5  #分类
hidden_dim = 64 # rnn隐藏单元数
lr = 0.001 # 学习率
epoches = 20 #训练次数
batch_size = 128 # 每一个训练批次数量
input_dim= 7 * 3 
device = "cuda" if torch.cuda.is_available() else "cpu"
time_step = 40
print(device)

cuda


In [4]:
def create_data_loader():
    train_data_path = "D:\\temp\\augment_action_windows"
    # train_data_path = "D:\\temp\\augment_action_windows"
    datasets = ActionDatasets(train_data_path, transform=torch.tensor, target_transform=torch.tensor, pick_path='train.pkl')
    test_datasets = ActionDatasets("D:\\temp\\yan1_action_windows\\action_windows", transform=torch.tensor, target_transform=torch.tensor, pick_path='test.pkl')
    split_rate = 0.8  # 训练集占整个数据集的比例
    train_len = int(split_rate * len(datasets))
    valid_len = len(datasets) - train_len

    train_sets, valid_sets = random_split(datasets, [train_len, valid_len], generator=torch.Generator().manual_seed(42))

    train_loader = DataLoader(train_sets, batch_size=batch_size, shuffle=True,drop_last=True,pin_memory=True)
    test_loader = DataLoader(test_datasets, batch_size=batch_size, shuffle=True,drop_last=True,pin_memory=True)
    valid_loader = DataLoader(valid_sets, batch_size=batch_size, shuffle=True,drop_last=True,pin_memory=True)

    print(f"训练集大小{len(train_sets)}， 验证集大小{len(valid_sets)}")
    return train_loader, test_loader, valid_loader
train_loader, test_loader, valid_loader = create_data_loader()

训练集大小63980， 验证集大小15996


In [5]:
class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, out_dim, time_step):
        super(RNN, self).__init__()
        self.time_step = time_step
        self.hidden_dim2 = hidden_dim2
        self.hidden_dim1 = hidden_dim1
        self.linear1 = nn.Linear(input_dim, hidden_dim1)
        self.linear2 = nn.Linear(input_dim, hidden_dim1)
        self.linear3 = nn.Linear(hidden_dim2, time_step)
        self.linear4 = nn.Linear(hidden_dim2, out_dim)

        self.rnn1 = nn.LSTM(hidden_dim1, hidden_dim2, batch_first=True)
        self.rnn2 = nn.LSTM(hidden_dim2, hidden_dim2, batch_first=True)
        self.dp1 = nn.Dropout(p=0.5)
        self.dp2 = nn.Dropout(p=0.5)
        self.dp3 = nn.Dropout(p=0.5)
        
    def forward(self, X):
        # X.shape = batch_size, time_step, feature_num
        A, B = X
        batch_size = A.shape[0]
        # out.shape = (batch_size, time_step, hidden_dim1)
        out1 = self.dp1(F.relu(self.linear1(B)))
        out2 = self.dp2(F.relu(self.linear2(A)))

        # out1.shape = out.shape,  status1 = (h, c)
        # h.shape = c.shape = (方向* 层数, batch_size, hidden_dim2)
        out1, (h1, c1) = self.rnn1(out1)

        h1 = h1.view(batch_size, self.hidden_dim2)
        out11 = self.dp3(F.relu(self.linear3(h1)))

        out11 = out11.unsqueeze(2).expand(batch_size, self.time_step, self.hidden_dim1)

        out3 = out2 + out11
        h2 = out3.mean(dim=1).unsqueeze(0)
        out, _ = self.rnn2(out1, (h2, torch.zeros_like(h2)))

        out = self.linear4(out[:,-1,:])

        return out

In [6]:
rnn = RNN(input_dim, hidden_dim, hidden_dim, classes, time_step).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), lr=lr)

In [7]:
def GETACC(loader=valid_loader):
    rnn.eval()
    cnt = 0
    sum_valid_acc = 0
    sum_valid_loss = 0
    for data, label in loader:
        data = [item.to(device) for item in data]
        label = label.to(device)
        out = rnn(data)
        
        _, predict = torch.max(out, 1)
        
        loss = criterion(out, label)
        sum_valid_loss += loss.item()
        acc = torch.sum((predict == label).int()) / batch_size
        sum_valid_acc += acc
        cnt+=1
    
    return sum_valid_loss/cnt, sum_valid_acc/cnt

In [8]:
for epoch in range(epoches):
    i = 0
    loss_sum = 0
    bar = tqdm(train_loader)
    for ii, (data , label) in enumerate(bar):
        rnn.train()
    
        data = [item.to(device) for item in data]
        label = label.to(device)
        out = rnn(data)
        loss = criterion(out, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        i+=1
        loss_sum += loss.item()

        if ii == 0:
            valid_loss,valid_acc = GETACC(valid_loader)
            
            bar.set_description(f"epoch = {epoch} train_loss = {loss_sum/i} valid_loss = {valid_loss} valid_acc= {valid_acc}")


epoch = 0 train_loss = 1.61309814453125 valid_loss = 1.6148927567466613 valid_acc= 0.1846018135547638: 100%|██████████| 499/499 [00:10<00:00, 46.15it/s]
epoch = 1 train_loss = 0.43142569065093994 valid_loss = 0.43095781041249154 valid_acc= 0.8332282900810242: 100%|██████████| 499/499 [00:09<00:00, 50.03it/s]
epoch = 2 train_loss = 0.19101852178573608 valid_loss = 0.2991094304428947 valid_acc= 0.8884198069572449: 100%|██████████| 499/499 [00:09<00:00, 50.22it/s]
epoch = 3 train_loss = 0.168981671333313 valid_loss = 0.22897810573058744 valid_acc= 0.9103452563285828: 100%|██████████| 499/499 [00:09<00:00, 50.87it/s]
epoch = 4 train_loss = 0.21631953120231628 valid_loss = 0.1919516451176136 valid_acc= 0.9179057478904724: 100%|██████████| 499/499 [00:09<00:00, 50.43it/s]
epoch = 5 train_loss = 0.18143948912620544 valid_loss = 0.1723446006616277 valid_acc= 0.9287424087524414: 100%|██████████| 499/499 [00:09<00:00, 50.39it/s]
epoch = 6 train_loss = 0.09578686952590942 valid_loss = 0.156006814

In [9]:
test_loss,test_acc = GETACC(test_loader)
print(f"test_loss = {test_loss}, test_acc = {test_acc}")

test_loss = 0.8903724891798837, test_acc = 0.7511160969734192
