In [18]:
import os
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm.notebook import tqdm 
from sklearn.metrics import accuracy_score, classification_report

import torch
import torch.nn.functional as F
from torch.utils import data
from torchinfo import summary
import torch.nn as nn
import torch.optim as optim

In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(f"Using device: {torch.cuda.get_device_name(device)}")

cuda:0
Using device: NVIDIA GeForce RTX 4070


In [20]:
label_list = ['label_5', 'label_10', 'label_20', 'label_40', 'label_60']

with open(f'./np_data/nparray_all_data.pkl', 'rb') as f:
    nparray_all_data = pickle.load(f)

with open(f'./np_data/nparray_all_label_label_5.pkl', 'rb') as f:
    np_label_5 = pickle.load(f)
with open(f'./np_data/nparray_all_label_label_10.pkl', 'rb') as f:
    np_label_10 = pickle.load(f)
with open(f'./np_data/nparray_all_label_label_20.pkl', 'rb') as f:
    np_label_20 = pickle.load(f)
with open(f'./np_data/nparray_all_label_label_40.pkl', 'rb') as f:
    np_label_40 = pickle.load(f)
with open(f'./np_data/nparray_all_label_label_60.pkl', 'rb') as f:
    np_label_60 = pickle.load(f)

n = len(nparray_all_data)
##划分训练/测试集
train_nums = int(n*0.8)
val_nums = int(n*0.1)
print(f"All data shape: {nparray_all_data.shape}, Lebel data shape: {np_label_5.shape}")
print(f'train_nums: {train_nums}, val_nums: {val_nums}, test_nums: {n-train_nums-val_nums}')

train_data = nparray_all_data[:train_nums]
val_data = nparray_all_data[train_nums:train_nums+val_nums]
test_data = nparray_all_data[train_nums+val_nums:]

train_label_5 = np_label_5[:train_nums]
val_label_5 = np_label_5[train_nums:train_nums+val_nums]
test_label_5 = np_label_5[train_nums+val_nums:]
train_label_10 = np_label_10[:train_nums]
val_label_10 = np_label_10[train_nums:train_nums+val_nums]
test_label_10 = np_label_10[train_nums+val_nums:]
train_label_20 = np_label_20[:train_nums]
val_label_20 = np_label_20[train_nums:train_nums+val_nums]
test_label_20 = np_label_20[train_nums+val_nums:]
train_label_40 = np_label_40[:train_nums]
val_label_40 = np_label_40[train_nums:train_nums+val_nums]
test_label_40 = np_label_40[train_nums+val_nums:]
train_label_60 = np_label_60[:train_nums]
val_label_60 = np_label_60[train_nums:train_nums+val_nums]
test_label_60 = np_label_60[train_nums+val_nums:]

All data shape: (2964051, 100, 5), Lebel data shape: (2964051,)
train_nums: 2371240, val_nums: 296405, test_nums: 296406


In [21]:
class Dataset(data.Dataset):
    def __init__(self, data, label,  num_classes, T):
        self.T = T

        # self.x = torch.tensor(data).to(torch.float32).unsqueeze(1).to(device)
        self.x = torch.tensor(data).to(torch.float32).to(device)

        # self.y = F.one_hot(torch.tensor(label[T - 1:].astype(np.int64)), num_classes=3)
        self.y = torch.tensor(label.astype(np.int64)).to(device)
    
        self.length = len(self.x)

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        return self.x[index], self.y[index]

batch_size = 128
dataset_val_5   = Dataset(data=val_data,  label=val_label_5,    num_classes=3, T=100)
dataset_val_10  = Dataset(data=val_data,  label=val_label_10,   num_classes=3, T=100)
dataset_val_20  = Dataset(data=val_data,  label=val_label_20,   num_classes=3, T=100)
dataset_val_40  = Dataset(data=val_data,  label=val_label_40,   num_classes=3, T=100)
dataset_val_60  = Dataset(data=val_data,  label=val_label_60,   num_classes=3, T=100)
val_loader_5    = torch.utils.data.DataLoader(dataset_val_5,   batch_size=batch_size, shuffle=False)
val_loader_10   = torch.utils.data.DataLoader(dataset_val_10,  batch_size=batch_size, shuffle=False)
val_loader_20   = torch.utils.data.DataLoader(dataset_val_20,  batch_size=batch_size, shuffle=False)
val_loader_40   = torch.utils.data.DataLoader(dataset_val_40,  batch_size=batch_size, shuffle=False)
val_loader_60   = torch.utils.data.DataLoader(dataset_val_60,  batch_size=batch_size, shuffle=False)

val_loader_list = [val_loader_5, val_loader_10, val_loader_20, val_loader_40, val_loader_60]

In [22]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.1)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        
        return F.softmax(out, dim=1)

# 模型参数定义
input_size = 5
hidden_size = 64
num_layers = 1
num_classes = 3
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)

In [23]:
for name in label_list:
    print(f"{'='*10} {name} {'='*10}")
    model.load_state_dict(torch.load(f'best_val_model_lstm_{name}.pth'))
    model.eval()
    correct = 0
    correct_tf = 0
    total = 0
    total_tf = 0
    predictions = []
    labels = []
    with torch.no_grad():
        for inputs, targets in val_loader_list[label_list.index(name)]:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            for i in range(targets.size(0)):
                if targets[i] == 0 or targets[i] == 2:  # 只考虑目标值为0或2的情况
                    total_tf += 1
                    if predicted[i] == targets[i]:  # 如果预测正确，增加正确的计数
                        correct_tf += 1

            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            predictions.append(predicted)
            labels.append(targets)

    print(f'Label: {name}')
    print(f'Accuracy: {correct/total}')
    print(f'correct: {correct}, total: {total}')
    print(f'Accuracy_True_False: {correct_tf/total_tf}')
    print(f'correct_True_False: {correct_tf}, total_tf: {total_tf}')
    print(classification_report(torch.cat(labels).cpu().numpy(), torch.cat(predictions).cpu().numpy()))

Label: label_5
Accuracy: 0.43231052107757967
correct: 128139, total: 296405
Accuracy_True_False: 0.6339140254739336
correct_True_False: 68483, total_tf: 108032
              precision    recall  f1-score   support

           0       0.30      0.60      0.40     54571
           1       0.84      0.32      0.46    188373
           2       0.31      0.67      0.42     53461

    accuracy                           0.43    296405
   macro avg       0.48      0.53      0.43    296405
weighted avg       0.64      0.43      0.44    296405

Label: label_10
Accuracy: 0.4436193721428451
correct: 131491, total: 296405
Accuracy_True_False: 0.5767164788780338
correct_True_False: 84711, total_tf: 146885
              precision    recall  f1-score   support

           0       0.36      0.55      0.44     74039
           1       0.70      0.31      0.43    149520
           2       0.37      0.60      0.46     72846

    accuracy                           0.44    296405
   macro avg       0.48    