In [1]:
import torch 
from torch import nn
from torch.utils import data # 获取迭代数据
from torch.autograd import Variable # 获取变量
import torchvision
from torchvision.datasets import mnist # 获取数据集

from torch.utils.data import random_split
import torch.utils.data as data
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, RandomSampler,WeightedRandomSampler
from torch.utils.data import random_split
from torch.utils.data import SequentialSampler
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
import csv
import math
import matplotlib.pyplot as plt
import os
pwd = os.getcwd()
#print(pwd)

import os
import sys

# 直接指定项目根目录
project_root = "C:/Users/Admin/Mouse"  # 替换成你的项目根目录路径
datautils_path = os.path.join(project_root, 'datautils')
model_path = os.path.join(project_root, 'model')

# 添加路径
sys.path.extend([datautils_path, model_path])

# 导入模块
from data_utils import load_mouse_data, process_mouse_data

# 然后再导入其他模块
from resnet_mouse import resnet50_1d
from mouse_traj_classification import MouseNeuralNetwork, MouseNeuralNetwork2
from new_optim import SWATS

from torch.optim.lr_scheduler import CosineAnnealingLR
from warmup_scheduler import GradualWarmupScheduler

In [2]:
import random
random_seed = 3407
# set the random seed for pytorch
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# set the random seed for "random"
random.seed(random_seed)

# set the random seed for numpy
np.random.seed(random_seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# 设置用户ID和窗口大小
user_id = 23
window_size = 400


# 加载和处理数据
X, label = process_mouse_data(
    f'D:/论文数据/mouse/data/processed_data_user{user_id}/positive_samples_user{user_id}_{window_size}.json',
    f'D:/论文数据/mouse/data/processed_data_user{user_id}/negative_samples_user{user_id}_{window_size}.json'
)
# 计算数据集划分长度

train_len = math.floor(len(X)*0.7)
test_len = len(X) - train_len


In [4]:

# 1. 检查数据集大小和batch_size的设置
print("数据集总大小:", len(X))
print("训练集大小:", train_len)
print("测试集大小:", test_len)

数据集总大小: 2627
训练集大小: 1838
测试集大小: 789


In [5]:
# 创建整体数据集
dataset = torch.utils.data.TensorDataset(
    X.float(),  # 确保输入数据是float类型
    label.long() # 确保标签是long类型
)

# 统计正负样本数量
pos_indices = (label == 0)  # 正样本索引
neg_indices = (label == 1)  # 负样本索引
pos_data = X[pos_indices]
neg_data = X[neg_indices]
pos_labels = label[pos_indices]
neg_labels = label[neg_indices]

print(f"\nOriginal distribution:")
print(f"Positive samples: {len(pos_data)}")
print(f"Negative samples: {len(neg_data)}")

# 计算正负样本中训练集和测试集的数量（70%训练，30%测试）
n_pos = len(pos_data)
n_neg = len(neg_data)

train_pos_len = math.floor(n_pos * 0.7)
test_pos_len = n_pos - train_pos_len

train_neg_len = math.floor(n_neg * 0.7)
test_neg_len = n_neg - train_neg_len

# 随机打乱正负样本索引
pos_shuffle = torch.randperm(n_pos)
neg_shuffle = torch.randperm(n_neg)

# 划分正样本：训练和测试
train_pos = pos_data[pos_shuffle[:train_pos_len]]
test_pos = pos_data[pos_shuffle[train_pos_len:]]

train_pos_labels = pos_labels[pos_shuffle[:train_pos_len]]
test_pos_labels = pos_labels[pos_shuffle[train_pos_len:]]

# 划分负样本：训练和测试
train_neg = neg_data[neg_shuffle[:train_neg_len]]
test_neg = neg_data[neg_shuffle[train_neg_len:]]

train_neg_labels = neg_labels[neg_shuffle[:train_neg_len]]
test_neg_labels = neg_labels[neg_shuffle[train_neg_len:]]

# 合并正负样本到训练集和测试集
train_data = torch.cat([train_pos, train_neg])
test_data = torch.cat([test_pos, test_neg])

train_labels = torch.cat([train_pos_labels, train_neg_labels])
test_labels = torch.cat([test_pos_labels, test_neg_labels])

# 随机打乱训练集和测试集数据
train_indices = torch.randperm(len(train_data))
test_indices = torch.randperm(len(test_data))

train_data = train_data[train_indices]
train_labels = train_labels[train_indices]

test_data = test_data[test_indices]
test_labels = test_labels[test_indices]

# 创建训练和测试数据集
train_dataset = torch.utils.data.TensorDataset(train_data, train_labels)
test_dataset = torch.utils.data.TensorDataset(test_data, test_labels)
# print(len(test_dataset))
# 打印划分后的分布信息
print("\nDistribution after split:")
print("Training set:")
print(f"Positive: {(train_labels == 0).sum().item()}, Negative: {(train_labels == 1).sum().item()}")
print("Test set:")
print(f"Positive: {(test_labels == 0).sum().item()}, Negative: {(test_labels == 1).sum().item()}")



Original distribution:
Positive samples: 2389
Negative samples: 238

Distribution after split:
Training set:
Positive: 1672, Negative: 166
Test set:
Positive: 717, Negative: 72


In [6]:
# 计算类别权重
class_count = [len(label[label == t]) for t in torch.unique(label)]
weights = [2.0 if t == 1 else 1.0 for t in label.cpu().numpy()]  # 转换为numpy数组
sample_weights = torch.FloatTensor(weights)
sampler = WeightedRandomSampler(sample_weights, len(sample_weights))


# 设置合适的batch_size
train_batch_size = 100  # 较小的batch_size，可以更好地训练  
test_batch_size = len(test_dataset)  # 测试集也可以用大一点的batch_size

# 创建数据加载器
X_train_loader = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=train_batch_size,
    shuffle=True,
    drop_last=False,
    pin_memory=True
)


X_test_loader = torch.utils.data.DataLoader(
    test_dataset, 
    batch_size=test_batch_size,
    shuffle=False,
    drop_last=False,
    pin_memory=True
)

In [7]:
X.shape

torch.Size([2627, 11, 400])

In [8]:
# 保存各个数据加载器
# 创建目录
import pickle
base_path = f'D:/论文数据/mouse/data_pkl/processed_data_user{user_id}'
if not os.path.exists(base_path):
    os.makedirs(base_path)
    print(f"Created directory: {base_path}")
else:
    print(f"Directory already exists: {base_path}")
train_path = f'{base_path}/train_loader_user{user_id}_{window_size}.pkl'
test_path = f'{base_path}/test_loader_user{user_id}_{window_size}.pkl'

with open(train_path, 'wb') as f:
    pickle.dump(X_train_loader, f)
with open(test_path, 'wb') as f:
    pickle.dump(X_test_loader, f)

print(f"Train loader saved to: {train_path}")
print(f"Test loader saved to: {test_path}")

Directory already exists: D:/论文数据/mouse/data_pkl/processed_data_user23
Train loader saved to: D:/论文数据/mouse/data_pkl/processed_data_user23/train_loader_user23_400.pkl
Test loader saved to: D:/论文数据/mouse/data_pkl/processed_data_user23/test_loader_user23_400.pkl


In [9]:
# 读取数据加载器的代码：
with open(train_path, 'rb') as f:
    X_train_loader = pickle.load(f)
with open(test_path, 'rb') as f:
    X_test_loader = pickle.load(f)

print(f"Loaded train loader from: {train_path}")
print(f"Loaded test loader from: {test_path}")

Loaded train loader from: D:/论文数据/mouse/data_pkl/processed_data_user23/train_loader_user23_400.pkl
Loaded test loader from: D:/论文数据/mouse/data_pkl/processed_data_user23/test_loader_user23_400.pkl


In [10]:
# 首先从数据中获取序列长度和特征数量
sequence_length = X.shape[2]  # 序列长度
num_features = X.shape[1]     # 特征数量

# 修改模型初始化部分
model = MouseNeuralNetwork2(length_single_mouse_traj=50)
model = model.to(device)  # 将模型移动到指定设备

max_lr = 1e-3
min_lr = 1e-7
warmup_epochs = 10
total_epochs = 300

# 3. 初始化优化器 (在模型移动到GPU之后初始化优化器)
optimizer = SWATS(model.parameters(), lr=1e-3)
optim_SGD = torch.optim.SGD(
    model.parameters(), 
    lr=1e-4, 
    momentum=0.80, 
    dampening=0, 
    weight_decay=1e-3, 
    nesterov=False
)

optim_ADAM = torch.optim.Adam(
    model.parameters(), 
    lr=min_lr
)

# After warmup, use cosine annealing for (total_epochs - warmup_epochs) steps
cosine_scheduler = CosineAnnealingLR(optim_ADAM, T_max=(total_epochs - warmup_epochs), eta_min=min_lr)

# The multiplier is how much you multiply the initial LR to get the target LR at the end of warmup.
# If initial LR = 1e-7 and we want 1e-3 after warmup:
# multiplier = (desired_lr_after_warmup) / (initial_lr) = 1e-3 / 1e-7 = 10,000
multiplier = max_lr / min_lr

warmup_scheduler = GradualWarmupScheduler(optim_ADAM, multiplier=multiplier, total_epoch=warmup_epochs, after_scheduler=cosine_scheduler)

# 检查第一个batch的数据
for batch_data, batch_labels in X_train_loader:
    print("数据维度:", batch_data.shape)
    print("标签维度:", batch_labels.shape)
    print("数据类型:", batch_data.dtype)
    print("标签类型:", batch_labels.dtype)
    break

数据维度: torch.Size([100, 11, 400])
标签维度: torch.Size([100])
数据类型: torch.float32
标签类型: torch.float32


In [11]:
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import torch
import torch.nn as nn
from IPython.display import clear_output
import matplotlib.pyplot as plt  # 导入matplotlib

def train_ADAM(model, X_train_loader, optimizer=None, epoch=300):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loss_function = nn.CrossEntropyLoss()
    model = model.to(device)
    loss_function = loss_function.to(device)
    
    losslist = []
    correctlist = []
    train_correctlist = []
    
    writer = SummaryWriter('/root/tf-logs')
    
    # 使用tqdm创建总进度条
    pbar = tqdm(total=epoch, desc='Training Progress')
    
    for i in range(epoch):
        model.train()
        totalloss = 0
        correct = 0
        total = 0
        num_batches = 0
        
        for data in X_train_loader:
            X, label = data
            X = X.float().to(device)
            label = label.long().to(device)
            
            optimizer.zero_grad()
            pred = model(X).squeeze()
            loss = loss_function(pred, label)
           
            totalloss += loss.item()
            num_batches += 1
            _, pred_id = torch.max(pred, dim=1)
            correct += (pred_id == label).sum().cpu().detach().numpy()
            total += label.size(0)
            
            loss.backward()
            optimizer.step()
        
        # Step the scheduler at the end of each epoch
        warmup_scheduler.step()

        # You can print or log the current LR to verify
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}: LR = {current_lr}")

        train_acc = correct / total
        avg_loss = totalloss / num_batches
        losslist.append(avg_loss)
        
        # 更新进度条
        pbar.update(1)
        pbar.set_postfix({
            'loss': f'{avg_loss:.4f}',
            'acc': f'{train_acc:.4f}'
        })
        
        # 每50个epoch记录一次
        if (i + 1) % 100 == 0:
            # clear_output(wait=True)  # 清除之前的输出
            print(f'Epoch {i+1}/{epoch} - Loss: {avg_loss:.4f}, Accuracy: {train_acc:.4f}')
            correctlist.append(train_acc)
            train_correctlist.append(train_acc)
            writer.add_scalar('Training loss', avg_loss, i)
            writer.add_scalar('Training accuracy', train_acc, i)
    
    pbar.close()
    writer.close()
    
    # 绘制损失曲线
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, epoch + 1), losslist, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss over Epochs')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # 显示图表

    # 可选：绘制准确率曲线
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, epoch + 1), correctlist, label='Training Accuracy', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training Accuracy over Epochs')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # 显示图表
    
    return optimizer, epoch, losslist, correctlist, train_correctlist


In [12]:
model = MouseNeuralNetwork2(length_single_mouse_traj=window_size)
optimizer = SWATS(model.parameters(), lr=1e-3)
optim_ADAM = torch.optim.Adam(model.parameters(), lr=1e-6, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-2)



In [None]:
# 仅使用训练集进行训练
optim_ADAM, epoch, losslist, correctlist, train_correctlist = train_ADAM(
    model, 
    X_train_loader, 
    optimizer=optim_ADAM, 
    epoch=total_epochs
)

Training Progress:   2%|▉                                   | 50/2000 [00:28<16:32,  1.96it/s, loss=0.4109, acc=0.9211]

Epoch 50/2000 - Loss: 0.4109, Accuracy: 0.9211


Training Progress:   5%|█▊                                 | 100/2000 [00:55<16:41,  1.90it/s, loss=0.3869, acc=0.9369]

Epoch 100/2000 - Loss: 0.3869, Accuracy: 0.9369


Training Progress:   8%|██▋                                | 150/2000 [01:22<16:23,  1.88it/s, loss=0.3682, acc=0.9483]

Epoch 150/2000 - Loss: 0.3682, Accuracy: 0.9483


Training Progress:  10%|███▌                               | 200/2000 [01:50<16:25,  1.83it/s, loss=0.3522, acc=0.9652]

Epoch 200/2000 - Loss: 0.3522, Accuracy: 0.9652


Training Progress:  12%|████▍                              | 250/2000 [02:17<15:16,  1.91it/s, loss=0.3491, acc=0.9679]

Epoch 250/2000 - Loss: 0.3491, Accuracy: 0.9679


Training Progress:  15%|█████▎                             | 300/2000 [02:44<14:55,  1.90it/s, loss=0.3449, acc=0.9701]

Epoch 300/2000 - Loss: 0.3449, Accuracy: 0.9701


Training Progress:  18%|██████▏                            | 350/2000 [03:11<14:37,  1.88it/s, loss=0.3395, acc=0.9782]

Epoch 350/2000 - Loss: 0.3395, Accuracy: 0.9782


Training Progress:  20%|███████                            | 400/2000 [03:45<19:16,  1.38it/s, loss=0.3317, acc=0.9820]

Epoch 400/2000 - Loss: 0.3317, Accuracy: 0.9820


Training Progress:  22%|███████▉                           | 450/2000 [04:20<18:15,  1.42it/s, loss=0.3308, acc=0.9837]

Epoch 450/2000 - Loss: 0.3308, Accuracy: 0.9837


Training Progress:  25%|████████▌                          | 491/2000 [04:50<18:20,  1.37it/s, loss=0.3275, acc=0.9869]

In [None]:
# 保存模型
state = {'model': model.state_dict()}
torch.save(state, f'D:/论文数据/mouse/model_pt/resnet/only-adam-user{user_id}_{window_size}-path.pt')

In [None]:
import torch

model = MouseNeuralNetwork2(X.shape[2])  # 替换为你的模型
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model has {params:,} trainable parameters")

In [None]:
# 加载预训练模型
model_transfer = MouseNeuralNetwork(window_size)  # 创建新模型
model_pretrain = MouseNeuralNetwork2(window_size)  # 创建预训练模型结构

In [None]:
# 加载预训练权重 - 使用weights_only=True提高安全性
pretrain_path = f'D:/论文数据/mouse/model_pt/resnet/only-adam-user{user_id}_{window_size}-path.pt'
checkpoint = torch.load(pretrain_path, weights_only=True)  # 添加weights_only=True
print(f"Loading pretrained model from: {pretrain_path}")
model_pretrain.load_state_dict(checkpoint['model'])

In [None]:
# 加载 ResNet1D 权重到 model_transfer.resnet
model_transfer.resnet.load_state_dict(checkpoint['model'], strict=False)
print("Pretrained ResNet1D weights loaded successfully.")

# 如果需要，可以选择冻结 ResNet1D 的参数
for param in model_transfer.resnet.parameters():
    param.requires_grad = True  # 冻结 ResNet1D 的参数

In [None]:
# 加载保存的数据加载器
import pickle
base_path = f'D:/论文数据/mouse/data_pkl/processed_data_user{user_id}'

train_path = f'{base_path}/train_loader_user{user_id}_{window_size}.pkl'
test_path = f'{base_path}/test_loader_user{user_id}_{window_size}.pkl'
print(f"Loading data loaders for user {user_id}:")
with open(train_path, 'rb') as f:
    X_train_loader = pickle.load(f)
with open(test_path, 'rb') as f:
    X_test_loader = pickle.load(f)


In [None]:
# 将模型移到GPU
model_transfer = model_transfer.cuda()
print(f"Model moved to: {next(model_transfer.parameters()).device}")

In [None]:
max_lr_transfer = 1e-3
min_lr_transfer = 1e-7
warmup_epochs_transfer = 10
total_epochs_transfer = 300

In [None]:
# 初始化优化器
optimizer = SWATS(model_transfer.parameters(), lr=1e-3)
optim_SGD = torch.optim.SGD(model_transfer.parameters(), lr=1e-4, momentum=0.8, dampening=0, weight_decay=1e-3, nesterov=False)
optim_ADAM_transfer = torch.optim.Adam(model_transfer.parameters(), lr=min_lr_transfer)

# After warmup, use cosine annealing for (total_epochs - warmup_epochs) steps
cosine_scheduler_transfer = CosineAnnealingLR(optim_ADAM_transfer, T_max=(total_epochs_transfer - warmup_epochs_transfer), eta_min=min_lr_transfer)

# The multiplier is how much you multiply the initial LR to get the target LR at the end of warmup.
# If initial LR = 1e-7 and we want 1e-3 after warmup:
# multiplier = (desired_lr_after_warmup) / (initial_lr) = 1e-3 / 1e-7 = 10,000
multiplier_transfer = max_lr_transfer / min_lr_transfer

warmup_scheduler_transfer = GradualWarmupScheduler(optim_ADAM_transfer, multiplier=multiplier_transfer, total_epoch=warmup_epochs_transfer, after_scheduler=cosine_scheduler_transfer)

In [None]:
def train_ADAM_transfer(model, X_train_loader, optimizer=None, epoch=300):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loss_function = nn.CrossEntropyLoss()
    model = model.to(device)
    loss_function = loss_function.to(device)
    
    losslist = []
    correctlist = []
    train_correctlist = []
    
    writer = SummaryWriter('/root/tf-logs')
    
    # 使用tqdm创建总进度条
    pbar = tqdm(total=epoch, desc='Training Progress')
    
    for i in range(epoch):
        model.train()
        totalloss = 0
        correct = 0
        total = 0
        num_batches = 0
        
        for data in X_train_loader:
            X, label = data
            X = X.float().to(device)
            label = label.long().to(device)
            
            optimizer.zero_grad()
            pred = model(X).squeeze()
            loss = loss_function(pred, label)
           
            totalloss += loss.item()
            num_batches += 1
            _, pred_id = torch.max(pred, dim=1)
            correct += (pred_id == label).sum().cpu().detach().numpy()
            total += label.size(0)
            
            loss.backward()
            optimizer.step()
        
        # Step the scheduler at the end of each epoch
        warmup_scheduler_transfer.step()

        # You can print or log the current LR to verify
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}: LR = {current_lr}")

        train_acc = correct / total
        avg_loss = totalloss / num_batches
        losslist.append(avg_loss)
        
        # 更新进度条
        pbar.update(1)
        pbar.set_postfix({
            'loss': f'{avg_loss:.4f}',
            'acc': f'{train_acc:.4f}'
        })
        
        # 每50个epoch记录一次
        if (i + 1) % 100 == 0:
            # clear_output(wait=True)  # 清除之前的输出
            print(f'Epoch {i+1}/{epoch} - Loss: {avg_loss:.4f}, Accuracy: {train_acc:.4f}')
            correctlist.append(train_acc)
            train_correctlist.append(train_acc)
            writer.add_scalar('Training loss', avg_loss, i)
            writer.add_scalar('Training accuracy', train_acc, i)
    
    pbar.close()
    writer.close()
    
    # 绘制损失曲线
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, epoch + 1), losslist, label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss over Epochs')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # 显示图表

    # 可选：绘制准确率曲线
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, epoch + 1), correctlist, label='Training Accuracy', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training Accuracy over Epochs')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # 显示图表
    
    return optimizer, epoch, losslist, correctlist, train_correctlist

In [None]:
# 训练迁移模型
optim_ADAM, epoch, losslist, correctlist, train_correctlist = train_ADAM_transfer(
    model_transfer, 
    X_train_loader, 
    optimizer=optim_ADAM, 
    epoch=total_epochs_transfer
)

In [None]:
# 保存迁移后的模型
transfer_save_path = f'D:/论文数据/mouse/model_pt/ending_model/gru-transfer-user{user_id}_{window_size}-path.pt'  # 修改为实际路径
state = {
    'model': model_transfer.state_dict(),
    'epoch': epoch
}
torch.save(state, transfer_save_path)
print(f"Transfer model saved to: {transfer_save_path}")

In [None]:
import torch

model = MouseNeuralNetwork(X.shape[2])  # 替换为你的模型
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model has {params:,} trainable parameters")

In [None]:
import os
import sys
import random
import time
import json
import dill
import csv
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import (
    DataLoader,
    RandomSampler,
    WeightedRandomSampler,
    SequentialSampler,
    random_split,
    TensorDataset
)
from torch.optim import Optimizer, SGD, Adam
from torch.backends import cudnn
from torch.autograd import Variable

import torchvision
from torchvision.datasets import mnist

from sklearn import metrics

# Set the random seed for reproducibility
random_seed = 3407
torch.manual_seed(random_seed)
cudnn.deterministic = True
cudnn.benchmark = False
random.seed(random_seed)

# Define project root and add paths
project_root = "C:/Users/Admin/Mouse"  # Replace with your project root path
datautils_path = os.path.join(project_root, 'datautils')
model_path = os.path.join(project_root, 'model')
sys.path.extend([datautils_path, model_path])

# Import custom modules
from data_utils import (
    load_mouse_data,
    process_mouse_data,
    read_test_data_shape,
    insert_new_test_data
)
from testdata_utils import process_predict_data
from resnet_mouse import resnet50_1d
from mouse_traj_classification import MouseNeuralNetwork, MouseNeuralNetwork2
from new_optim import SWATS

# Optional variables
# user_id = 23
# window_size = 10



In [None]:
def save_results_to_csv(user_id, window_size, metrics_dict, csv_path):
    """
    保存结果到CSV文件
    Args:
        user_id: 用户ID
        window_size: 窗口大小
        metrics_dict: 包含各项指标的字典
        csv_path: CSV文件路径
    """
    # 定义列名
    columns = ['user_id', 'window_size', 'recall', 'accuracy', 'precision', 'F1', 'AUC', 'EER']
    
    # 检查文件是否存在
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)
    else:
        df = pd.DataFrame(columns=columns)
    
    # 创建新行数据
    new_row = pd.DataFrame([{
        'user_id': f'user{user_id}',
        'window_size': window_size,
        'recall': metrics_dict['recall'],
        'accuracy': metrics_dict['accuracy'],
        'precision': metrics_dict['precision'],
        'F1': metrics_dict['f1'],
        'AUC': metrics_dict['auc'],
        'EER': metrics_dict['eer']
    }])
    
    # 使用concat替代append
    df = pd.concat([df, new_row], ignore_index=True)
    
    # 保存到CSV
    df.to_csv(csv_path, index=False)
    print(f"Results saved to {csv_path}")

In [None]:
# 评估函数
def val(model, data_loader):
    correct = 0 
    fenmu = 0
    New_label = []
    
    total_inference_time = 0
    num_inferences = 0

    for data in data_loader:
        X, label = data
        model.zero_grad()
        
        start_time = time.time()
        pred = model(X).squeeze()
        end_time = time.time()
        
        total_inference_time += (end_time - start_time)
        num_inferences += X.size(0)
        
        _, pred_id = torch.max(pred, dim=1)
        correct += (pred_id == label).sum().cpu().detach().numpy()
        New_label.append(label)
        fenmu += label.size(0)
        
    correct = correct / fenmu
    avg_inference_time = total_inference_time / num_inferences
    print(f"Average inference time: {avg_inference_time:.6f} seconds")
    
    return pred_id, pred, label

def recall(predictions, labels):
    TP, FP, TN, FN = 0, 0, 0, 0
    for i in range(len(predictions)):
        if predictions[i] == 1 and labels[i] == 1:
            TP += 1
        elif predictions[i] == 1 and labels[i] == 0:
            FP += 1
        elif predictions[i] == 0 and labels[i] == 0:
            TN += 1
        elif predictions[i] == 0 and labels[i] == 1:
            FN += 1
    if (TP + FN) == 0:
        print(predictions)
        print(labels)
    recall = TP / (TP + FN)
    return recall

def et_TPFN(predictions, labels):
    TP, FP, TN, FN = 0, 0, 0, 0
    for i in range(len(predictions)):
        if predictions[i] == 1 and labels[i] == 1:
            TP += 1
        elif predictions[i] == 1 and labels[i] == 0:
            FP += 1
        elif predictions[i] == 0 and labels[i] == 0:
            TN += 1
        elif predictions[i] == 0 and labels[i] == 1:
            FN += 1
    return TP, FP, TN, FN

def pre(predictions, labels):
    TP, FP, TN, FN = 0, 0, 0, 0
    for i in range(len(predictions)):
        if predictions[i] == 1 and labels[i] == 1:
            TP += 1
        elif predictions[i] == 1 and labels[i] == 0:
            FP += 1
        elif predictions[i] == 0 and labels[i] == 0:
            TN += 1
        elif predictions[i] == 0 and labels[i] == 1:
            FN += 1
    return TP/(FP+TP)
# 在评估函数部分添加新的指标计算函数
def calculate_f1(precision, recall):
    """计算F1分数"""
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

def calculate_eer(fpr, tpr):
    """计算EER (Equal Error Rate)"""
    fnr = 1 - tpr
    # EER是当FAR(FPR)等于FRR(FNR)时的值
    eer = fpr[np.nanargmin(np.absolute(fnr - fpr))]
    return eer

In [None]:
import dill
import torch
import json
import numpy as np


# 1. 加载原始测试数据
test_path = f'D:/论文数据/mouse/data_pkl/processed_data_user{user_id}/test_loader_user{user_id}_{window_size}.pkl'
print(f"\nLoading test data from: {test_path}")
with open(test_path, 'rb') as f:
    X_test_loader = dill.load(f)

# 将所有测试样本整合为单独的tensor（X_all, y_all）
X_list = []
y_list = []
for data in X_test_loader:
    X_batch, y_batch = data
    X_list.append(X_batch)
    y_list.append(y_batch)

X_all = torch.cat(X_list, dim=0)
y_all = torch.cat(y_list, dim=0)

print(f"\nOriginal Test Dataset Statistics:")
print(f"Total number of samples: {len(X_all)}")
print(f"Positive (0): {(y_all == 0).sum().item()}")
print(f"Negative (1): {(y_all == 1).sum().item()}")
print(f"Batch size: {X_test_loader.batch_size}")
print(f"Number of batches: {len(X_test_loader)}")
print(f"Sample shape: {X_all.shape[1:]}")




In [None]:
# 2. 加载和处理预测数据
predict_path = f'D:/论文数据/mouse/data/processed_data_user{user_id}/predict_samples_user{user_id}_{window_size}.json'
print(f"\nLoading predict data from: {predict_path}")

with open(predict_path, 'r') as f:
    predict_data = json.load(f)

# 将预测数据转换为tensor
trajectories = []
feature_names = predict_data['metadata']['feature_names']
for sample in predict_data['samples']:
    trajectory = np.array([[step[feature] for feature in feature_names] for step in sample])
    trajectories.append(trajectory)

X_predict = np.array(trajectories)
X_predict = torch.FloatTensor(X_predict).transpose(1, 2)  # [N, features, time]

# 原始测试数据是否有4维？如果有，需要与预测数据统一
# 检查原始测试数据的维度，若是4维，这里假设需要在feature维前再增加一维
if len(X_all.shape) == 4 and X_predict.dim() == 3:
    X_predict = X_predict.unsqueeze(dim=1)  # 在 feature 前面增加一个维度，使 shape 一致
elif len(X_all.shape) == 3 and X_predict.dim() == 4:
    # 若原始数据是3维，而预测是4维，则需要去掉一维
    X_predict = X_predict.squeeze(dim=1)

print(f"Predict samples shape after processing: {X_predict.shape}")

# 3. 固定选择195个正样本、19个负样本和19个预测样本
desired_pos = 456
desired_neg = 44
desired_predict = 44

pos_indices = torch.where(y_all == 0)[0]
neg_indices = torch.where(y_all == 1)[0]

# 随机选择所需数量的正负样本
if len(pos_indices) < desired_pos:
    raise ValueError(f"Not enough positive samples. Required {desired_pos}, got {len(pos_indices)}.")
if len(neg_indices) < desired_neg:
    raise ValueError(f"Not enough negative samples. Required {desired_neg}, got {len(neg_indices)}.")

pos_indices = pos_indices[torch.randperm(len(pos_indices))[:desired_pos]]
neg_indices = neg_indices[torch.randperm(len(neg_indices))[:desired_neg]]

X_selected = torch.cat([X_all[pos_indices], X_all[neg_indices]], dim=0)
y_selected = torch.cat([y_all[pos_indices], y_all[neg_indices]], dim=0)

# 从预测数据中随机选择19个样本
if len(X_predict) < desired_predict:
    print(f"Warning: Only {len(X_predict)} predict samples available, less than desired {desired_predict}")
    desired_predict = len(X_predict)  # 若不够则使用全部
indices = torch.randperm(len(X_predict))[:desired_predict]
X_insert = X_predict[indices]

# 创建对应的预测样本标签（使用1表示负样本）
label_insert = torch.ones(len(X_insert), dtype=torch.int64)

# 合并预测样本与原始样本
X_final = torch.cat([X_selected, X_insert], dim=0)
y_final = torch.cat([y_selected, label_insert], dim=0)

print("\nFinal Test Dataset Statistics:")
print(f"Total number of samples: {len(X_final)}")
print(f"Positive (0): {(y_final == 0).sum().item()}")
print(f"Negative (1): {(y_final == 1).sum().item()}")
print(f"Data shape: {X_final.shape[1:]}")

# 如有需要，可将X_final, y_final打乱
final_indices = torch.randperm(len(X_final))
X_final = X_final[final_indices]
y_final = y_final[final_indices]

# 这样X_final和y_final就是最终的测试数据集（195正,19负,19预测）

In [None]:
import torch
import numpy as np


print("Creating new test DataLoader from the selected final samples...")

# 根据X_final和y_final创建数据集与DataLoader
test_dataset = torch.utils.data.TensorDataset(X_final, y_final)
# 根据需要设置batch_size和是否shuffle
new_test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

# 打印插入后的shape信息（单个样本的形状）
shape_new_single_mouse_traj = X_final.shape[1:]
print(f"Shape after insertion: {shape_new_single_mouse_traj}")

# 检查new_test_dataloader的详细信息
print("\nNew Test DataLoader Details:")
total_samples = 0
total_batches = 0
batch_sizes = []

for batch_idx, (X, labels) in enumerate(new_test_dataloader):
    total_samples += len(X)
    total_batches += 1
    batch_sizes.append(len(X))
    if batch_idx == 0:
        print(f"Single batch shape: {X.shape}")
        # 假设X的shape为 [batch, feature, seq_length] (或根据实际数据形状)
        print(f"Features dimension: {X.shape[1]}")
        print(f"Sequence length: {X.shape[2]}")

print(f"\nTotal number of samples: {total_samples}")
print(f"Number of batches: {total_batches}")
print(f"Batch sizes: {batch_sizes}")

# 检查标签分布
all_labels = []
for _, labels in new_test_dataloader:
    all_labels.extend(labels.cpu().numpy())
    
unique_labels, counts = np.unique(all_labels, return_counts=True)
print("\nLabel distribution:")
for label, count in zip(unique_labels, counts):
    print(f"Label {label}: {count} samples")


In [None]:
# 3. 创建和加载模型
New_model_new_test = MouseNeuralNetwork(shape_new_single_mouse_traj[-1])
model_path = f'D:/论文数据/mouse/model_pt/ending_model/gru-transfer-user{user_id}_{window_size}-path.pt'
print(f"\nLoading model from: {model_path}")
checkpoint = torch.load(model_path, weights_only=True) 
New_model_new_test.load_state_dict(checkpoint['model'])

In [None]:
from sklearn import metrics
import numpy as np
import os
import torch

# 4. 评估原始测试集
print("\nEvaluating original test set...")
d_c = []      # recall
d_acc = []    # accuracy
d_precsion = [] # precision
d_f1 = []     # F1 score
d_auc = []    # AUC
d_eer = []    # EER
d_confusion_matrices = []  # 混淆矩阵

for i in range(1):
    pred_id_list, pred_list, label_list = val(New_model_new_test, X_test_loader)
    
    # 确保数据格式正确
    if isinstance(pred_list, torch.Tensor):
        pred_probs = pred_list.detach().cpu().numpy()
    else:
        pred_probs = pred_list
        
    if isinstance(label_list, torch.Tensor):
        labels = label_list.cpu().numpy()
    else:
        labels = label_list
    
    # 计算基本指标
    zhaohui = recall(pred_id_list, label_list)
    c = et_TPFN(pred_id_list, label_list)
    acc = (c[0]+c[2])/( c[0]+c[1]+c[2]+c[3])
    precision = pre(pred_id_list, label_list)
    
    # 计算F1分数
    f1 = calculate_f1(precision, zhaohui)
    
    # 计算ROC和AUC
    fpr, tpr, _ = metrics.roc_curve(labels, pred_probs[:, 1])
    auc = metrics.auc(fpr, tpr)
    
    # 计算EER
    eer = calculate_eer(fpr, tpr)
    
    # 计算混淆矩阵
    conf_matrix = metrics.confusion_matrix(labels, pred_id_list)
    
    # 保存所有指标
    d_c.append(zhaohui)
    d_acc.append(acc)
    d_precsion.append(precision)
    d_f1.append(f1)
    d_auc.append(auc)
    d_eer.append(eer)
    d_confusion_matrices.append(conf_matrix)
    
    print(f"Round {i+1}/10 completed")

print("\nOriginal Test Set Results:")
print(f"Average Recall: {np.mean(d_c):.4f} ± {np.std(d_c):.4f}")
print(f"Average Accuracy: {np.mean(d_acc):.4f} ± {np.std(d_acc):.4f}")
print(f"Average Precision: {np.mean(d_precsion):.4f} ± {np.std(d_precsion):.4f}")
print(f"Average F1 Score: {np.mean(d_f1):.4f} ± {np.std(d_f1):.4f}")
print(f"Average AUC: {np.mean(d_auc):.4f} ± {np.std(d_auc):.4f}")
print(f"Average EER: {np.mean(d_eer):.4f} ± {np.std(d_eer):.4f}")

# 打印混淆矩阵
for i, conf_matrix in enumerate(d_confusion_matrices):
    print(f"\nConfusion Matrix for Round {i+1}:")
    print(conf_matrix)

# 保存原始测试集结果
original_metrics = {
    'recall': np.mean(d_c),
    'accuracy': np.mean(d_acc),
    'precision': np.mean(d_precsion),
    'f1': np.mean(d_f1),
    'auc': np.mean(d_auc),
    'eer': np.mean(d_eer)
}

# 定义基础路径
base_path = f'D:/论文数据/mouse/RESULTS'

# 确保目录存在
os.makedirs(base_path, exist_ok=True)

# 构建文件名，添加 user_id 和 window_size
filename = f'original_test_results_user{user_id}.csv'

# 拼接完整的文件路径
full_path = os.path.join(base_path, filename)

# 调用保存函数
save_results_to_csv(user_id, window_size, original_metrics, full_path)


In [None]:
# 5. 评估插入预测样本后的测试集
print("\nEvaluating test set with predict samples...")
d_c_2 = []
d_acc_2 = []
d_precsion_2 = []
d_f1_2 = []
d_auc_2 = []    # 添加AUC列表
d_eer_2 = []
d_confusion_matrices2 = []  # 混淆矩阵
for i in range(1):
    pred_id_list, pred_list, label_list = val(New_model_new_test, new_test_dataloader)
    # 确保数据格式正确
    if isinstance(pred_list, torch.Tensor):
        pred_probs = pred_list.detach().cpu().numpy()
    else:
        pred_probs = pred_list
        
    if isinstance(label_list, torch.Tensor):
        labels = label_list.cpu().numpy()
    else:
        labels = label_list
    
    # 计算基本指标
    zhaohui = recall(pred_id_list, label_list)
    c = et_TPFN(pred_id_list, label_list)
    acc = (c[0]+c[2])/( c[0]+c[1]+c[2]+c[3])
    precision = pre(pred_id_list, label_list)
    
    # 计算F1分数
    f1 = calculate_f1(precision, zhaohui)
    
    # 计算ROC和AUC
    fpr, tpr, _ = metrics.roc_curve(labels, pred_probs[:, 1])
    auc = metrics.auc(fpr, tpr)
    
    # 计算EER
    eer = calculate_eer(fpr, tpr)

    # 计算混淆矩阵
    conf_matrix = metrics.confusion_matrix(labels, pred_id_list)
    
    # 保存所有指标
    d_c_2.append(zhaohui)
    d_acc_2.append(acc)
    d_precsion_2.append(precision)
    d_f1_2.append(f1)
    d_auc_2.append(auc)    # 保存AUC值
    d_eer_2.append(eer)
    d_confusion_matrices2.append(conf_matrix)
    print(f"Round {i+1}/10 completed")

print("\nTest Set with Predict Samples Results:")
print(f"Average Recall: {np.mean(d_c_2):.4f} ± {np.std(d_c_2):.4f}")
print(f"Average Accuracy: {np.mean(d_acc_2):.4f} ± {np.std(d_acc_2):.4f}")
print(f"Average Precision: {np.mean(d_precsion_2):.4f} ± {np.std(d_precsion_2):.4f}")
print(f"Average F1 Score: {np.mean(d_f1_2):.4f} ± {np.std(d_f1_2):.4f}")
print(f"Average AUC: {np.mean(d_auc_2):.4f} ± {np.std(d_auc_2):.4f}")    # 打印AUC结果
print(f"Average EER: {np.mean(d_eer_2):.4f} ± {np.std(d_eer_2):.4f}")

# 保存插入预测样本后的测试集结果
merged_metrics = {
    'recall': np.mean(d_c_2),
    'accuracy': np.mean(d_acc_2),
    'precision': np.mean(d_precsion_2),
    'f1': np.mean(d_f1_2),
    'auc': np.mean(d_auc_2),    # 添加AUC到保存结果中
    'eer': np.mean(d_eer_2)
}

# 打印混淆矩阵
for i, conf_matrix in enumerate(d_confusion_matrices2):
    print(f"\nConfusion Matrix for Round {i+1}:")
    print(conf_matrix)


# 定义基础路径
base_path = f'D:/论文数据/mouse/RESULTS'

# 确保目录存在
os.makedirs(base_path, exist_ok=True)

# 构建文件名，添加 user_id 和 window_size
filename = f'merged_test_results_user{user_id}.csv'

# 拼接完整的文件路径
full_path = os.path.join(base_path, filename)



# 调用保存函数
save_results_to_csv(user_id, window_size, merged_metrics, full_path)


In [None]:
# 6. 计算和绘制ROC曲线
from sklearn import metrics

# 计算第一个测试集的ROC
pred_id, pred, label = val(New_model_new_test, X_test_loader)
fpr1, tpr1, _ = metrics.roc_curve(label, pred[:,1].detach().numpy())
auc1 = metrics.auc(fpr1, tpr1)

# 计算第二个测试集的ROC
pred_id2, pred2, label2 = val(New_model_new_test, new_test_dataloader)
fpr2, tpr2, _ = metrics.roc_curve(label2, pred2[:,1].detach().numpy())
auc2 = metrics.auc(fpr2, tpr2)

# 绘制ROC曲线对比图
plt.figure(figsize=(10,8))
plt.plot(fpr1, tpr1, 'b-', label=f'Original Test Set (AUC = {auc1:.4f})')
plt.plot(fpr2, tpr2, 'r--', label=f'Test Set with Predict Samples (AUC = {auc2:.4f})')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'ROC Curves Comparison - User {user_id}')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

# 7. 绘制性能指标对比图
metrics = ['Recall', 'Accuracy', 'Precision']
test1_scores = [np.mean(d_c), np.mean(d_acc), np.mean(d_precsion)]
test2_scores = [np.mean(d_c_2), np.mean(d_acc_2), np.mean(d_precsion_2)]

x = np.arange(len(metrics))
width = 0.35

plt.figure(figsize=(10,6))
plt.bar(x - width/2, test1_scores, width, label='Original Test Set')
plt.bar(x + width/2, test2_scores, width, label='Test Set with Predict Samples')

plt.ylabel('Scores')
plt.title('Performance Metrics Comparison')
plt.xticks(x, metrics)
plt.legend()
plt.grid(True)
plt.show()


# 更新性能指标对比图，包含AUC
metrics = ['Recall', 'Accuracy', 'Precision', 'F1', 'AUC', 'EER']
test1_scores = [np.mean(d_c), np.mean(d_acc), np.mean(d_precsion), 
                np.mean(d_f1), np.mean(d_auc), np.mean(d_eer)]
test2_scores = [np.mean(d_c_2), np.mean(d_acc_2), np.mean(d_precsion_2), 
                np.mean(d_f1_2), np.mean(d_auc_2), np.mean(d_eer_2)]

x = np.arange(len(metrics))
width = 0.35

plt.figure(figsize=(12,6))
plt.bar(x - width/2, test1_scores, width, label='Original Test Set')
plt.bar(x + width/2, test2_scores, width, label='Test Set with Predict Samples')

plt.ylabel('Scores')
plt.title('Performance Metrics Comparison')
plt.xticks(x, metrics)
plt.legend()
plt.grid(True)
plt.show()