In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
# 固定 PyTorch、NumPy 和 random 的随机种子
seed = 2  # 你可以改成任意整数
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# 如果使用了 GPU，还需要固定 CUDA 相关的随机性
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # 多 GPU 训练时
# torch.backends.cudnn.deterministic = True  # 确保 CUDA 计算是确定性的
# torch.backends.cudnn.benchmark = False  # 关闭自动优化

In [3]:
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, dropout):
        super(TemporalBlock, self).__init__()
        padding = (kernel_size - 1) * dilation  # Correct padding to maintain the output size
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.batchnorm1 = nn.BatchNorm1d(out_channels)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.batchnorm2 = nn.BatchNorm1d(out_channels)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)
        
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.conv1.weight)
        nn.init.xavier_uniform_(self.conv2.weight)
        if self.downsample is not None:
            nn.init.xavier_uniform_(self.downsample.weight)

    def forward(self, x):
        # Applying convolutions
        out = self.conv1(x)
        out = self.batchnorm1(out)
        out = self.relu1(out)
        out = self.dropout1(out)
        
        out = self.conv2(out)
        out = self.batchnorm2(out)
        out = self.relu2(out)
        out = self.dropout2(out)
        
        # Handling residual connection
        res = x if self.downsample is None else self.downsample(x)
        # Trimming output to match residual size if needed
        out = out[:, :, :x.size(2)]  # Ensure the output size matches the input size
        
        return F.relu(out + res)


In [4]:
class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i - 1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     dropout=dropout)]
        
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)


class IMTCNModel(nn.Module):
    def __init__(self, input_size, num_channels, output_size, kernel_size=2, dropout=0.2):
        super(IMTCNModel, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout)
        self.linear = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        y1 = self.tcn(x)  # Multi-scale temporal feature extraction
        y1 = y1[:, :, -1]  # Take the last output along the temporal dimension for classification
        out = self.linear(y1)
        return out


In [5]:
path=r'data'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32).to(device)
        self.y = torch.tensor(y, dtype=torch.long).to(device)  # 确保标签是长整型

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    


In [12]:
import numpy as np

# 读取原始数据
X_train = np.load(path + "/X_train.npy")
y_train = np.load(path + "/y_train.npy")
X_test = np.load(path + "/X_test.npy")
y_test = np.load(path + "/y_test.npy")

# 合并数据
X = np.concatenate((X_train, X_test), axis=0)  # 合并特征
y = np.concatenate((y_train, y_test), axis=0)  # 合并标签
dataset = TimeSeriesDataset(X, y)

In [8]:
import torch.optim as optim
input_size = 7  # Number of features (e.g., parameters from QAR data)
num_channels = [16, 32, 64]  # Channels for each TCN block
output_size = 2  # For example, binary classification
model = IMTCNModel(input_size, num_channels, output_size)
class_weights = torch.tensor([1., 1.], device=device) 
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
## Model Checkpointing epochs=10000
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, matthews_corrcoef
from sklearn.model_selection import StratifiedKFold
import torch
import numpy as np

# 设定随机种子（以保证可复现性）
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# 设定超参数
num_folds = 5  # 5折交叉验证
best_model_path = 'save/IMTCN_best_model.pth'  # 最佳模型保存路径

# 初始化存储评估指标的列表
accuracy_list = []
f1_list = []
roc_auc_list = []
mcc_list = []

# 创建 5-Fold 交叉验证对象
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)



# 进行五折交叉验证
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f'Fold {fold + 1}/{num_folds}')
    
    # 划分训练集和验证集
    train_subsampler = torch.utils.data.Subset(dataset, train_idx)
    val_subsampler = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

    # 重新初始化模型和优化器
    model = IMTCNModel(input_size, num_channels, output_size).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    best_f1 = 0.0  # 记录当前 fold 内的最佳 F1

    # 训练循环
    num_epochs = 1000  # 训练轮数
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_predictions = []

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # 前向传播
            outputs = model(data)
            loss = criterion(outputs, target.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * data.size(0)
            probs = torch.sigmoid(outputs.squeeze(1))
            _, predicted = torch.max(probs, 1)
            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)
        f1 = f1_score(all_labels, all_predictions)

        print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, F1: {f1:.4f}')
        
        # 记录最优模型
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), best_model_path)
            print(f'Saved best model for fold {fold + 1} with F1: {best_f1:.4f}')

    # 加载最佳模型进行验证
    model.load_state_dict(torch.load(best_model_path))
    model.eval()
    all_labels = []
    all_predictions = []
    all_probs = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            probabilities = torch.softmax(outputs, dim=1)[:, 1]  # 取正类概率
            preds = torch.argmax(outputs, dim=1)

            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probs.extend(probabilities.cpu().numpy())

    # 计算指标
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_probs)
    mcc = matthews_corrcoef(all_labels, all_predictions)

    print(f'Fold {fold + 1} Evaluation - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}')
    
    # 存储每折的结果
    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_auc_list.append(roc_auc)
    mcc_list.append(mcc)

# 计算五折交叉验证的平均指标
print("\nFinal 5-Fold Cross-Validation Results:")
print(f'Average Accuracy: {np.mean(accuracy_list):.4f} ± {np.std(accuracy_list):.4f}')
print(f'Average F1 Score: {np.mean(f1_list):.4f} ± {np.std(f1_list):.4f}')
print(f'Average ROC AUC: {np.mean(roc_auc_list):.4f} ± {np.std(roc_auc_list):.4f}')
print(f'Average MCC: {np.mean(mcc_list):.4f} ± {np.std(mcc_list):.4f}')


Fold 1/5
Fold 1, Epoch 1/1000, Loss: 3.9631, F1: 0.3346
Saved best model for fold 1 with F1: 0.3346
Fold 1, Epoch 2/1000, Loss: 0.7274, F1: 0.2255
Fold 1, Epoch 3/1000, Loss: 0.6522, F1: 0.2617
Fold 1, Epoch 4/1000, Loss: 0.6076, F1: 0.3625
Saved best model for fold 1 with F1: 0.3625
Fold 1, Epoch 5/1000, Loss: 0.5746, F1: 0.4493
Saved best model for fold 1 with F1: 0.4493
Fold 1, Epoch 6/1000, Loss: 0.6159, F1: 0.4254
Fold 1, Epoch 7/1000, Loss: 0.5821, F1: 0.5103
Saved best model for fold 1 with F1: 0.5103
Fold 1, Epoch 8/1000, Loss: 0.5512, F1: 0.5000
Fold 1, Epoch 9/1000, Loss: 0.5264, F1: 0.5664
Saved best model for fold 1 with F1: 0.5664
Fold 1, Epoch 10/1000, Loss: 0.5766, F1: 0.5283
Fold 1, Epoch 11/1000, Loss: 0.5249, F1: 0.5650
Fold 1, Epoch 12/1000, Loss: 0.5334, F1: 0.5738
Saved best model for fold 1 with F1: 0.5738
Fold 1, Epoch 13/1000, Loss: 0.5120, F1: 0.5613
Fold 1, Epoch 14/1000, Loss: 0.5074, F1: 0.5714
Fold 1, Epoch 15/1000, Loss: 0.5179, F1: 0.5576
Fold 1, Epoch 16

In [None]:
##epoch 500
from sklearn.model_selection import StratifiedKFold
import torch
import numpy as np

# 设定随机种子（保证可复现性）
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# 设定超参数
num_folds = 5  # 5折交叉验证

# 初始化存储评估指标的列表
accuracy_list = []
f1_list = []
roc_auc_list = []
mcc_list = []

# 创建 5-Fold 交叉验证对象
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)

# 进行五折交叉验证
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f'Fold {fold + 1}/{num_folds}')
    
    # 划分训练集和验证集
    train_subsampler = torch.utils.data.Subset(dataset, train_idx)
    val_subsampler = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

    # 重新初始化模型和优化器
    model = IMTCNModel(input_size, num_channels, output_size).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # 训练循环
    num_epochs = 500  # 训练轮数（固定不变）
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_predictions = []

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # 前向传播
            outputs = model(data)
            loss = criterion(outputs, target.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * data.size(0)
            probs = torch.sigmoid(outputs.squeeze(1))
            _, predicted = torch.max(probs, 1)
            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)
        f1 = f1_score(all_labels, all_predictions)

        print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, F1: {f1:.4f}')
    
    # 不保存模型，直接使用最后一轮训练的模型进行评估
    model.eval()
    all_labels = []
    all_predictions = []
    all_probs = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            probabilities = torch.softmax(outputs, dim=1)[:, 1]  # 取正类概率
            preds = torch.argmax(outputs, dim=1)

            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probs.extend(probabilities.cpu().numpy())

    # 计算指标
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_probs)
    mcc = matthews_corrcoef(all_labels, all_predictions)

    print(f'Fold {fold + 1} Evaluation - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}')
    
    # 存储每折的结果
    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_auc_list.append(roc_auc)
    mcc_list.append(mcc)

# 计算五折交叉验证的平均指标
print("\nFinal 5-Fold Cross-Validation Results:")
print(f'Average Accuracy: {np.mean(accuracy_list):.4f} ± {np.std(accuracy_list):.4f}')
print(f'Average F1 Score: {np.mean(f1_list):.4f} ± {np.std(f1_list):.4f}')
print(f'Average ROC AUC: {np.mean(roc_auc_list):.4f} ± {np.std(roc_auc_list):.4f}')
print(f'Average MCC: {np.mean(mcc_list):.4f} ± {np.std(mcc_list):.4f}')


Fold 1/5
Fold 1, Epoch 1/500, Loss: 3.9631, F1: 0.3346
Fold 1, Epoch 2/500, Loss: 0.7274, F1: 0.2255
Fold 1, Epoch 3/500, Loss: 0.6522, F1: 0.2617
Fold 1, Epoch 4/500, Loss: 0.6076, F1: 0.3625
Fold 1, Epoch 5/500, Loss: 0.5746, F1: 0.4493
Fold 1, Epoch 6/500, Loss: 0.6159, F1: 0.4254
Fold 1, Epoch 7/500, Loss: 0.5821, F1: 0.5103
Fold 1, Epoch 8/500, Loss: 0.5512, F1: 0.5000
Fold 1, Epoch 9/500, Loss: 0.5264, F1: 0.5664
Fold 1, Epoch 10/500, Loss: 0.5766, F1: 0.5283
Fold 1, Epoch 11/500, Loss: 0.5249, F1: 0.5650
Fold 1, Epoch 12/500, Loss: 0.5334, F1: 0.5738
Fold 1, Epoch 13/500, Loss: 0.5120, F1: 0.5613
Fold 1, Epoch 14/500, Loss: 0.5074, F1: 0.5714
Fold 1, Epoch 15/500, Loss: 0.5179, F1: 0.5576
Fold 1, Epoch 16/500, Loss: 0.5121, F1: 0.5751
Fold 1, Epoch 17/500, Loss: 0.5010, F1: 0.5870
Fold 1, Epoch 18/500, Loss: 0.5211, F1: 0.5849
Fold 1, Epoch 19/500, Loss: 0.5118, F1: 0.5796
Fold 1, Epoch 20/500, Loss: 0.4782, F1: 0.6165
Fold 1, Epoch 21/500, Loss: 0.5086, F1: 0.5869
Fold 1, Epoch

In [26]:
##epoch 300
from sklearn.model_selection import StratifiedKFold
import torch
import numpy as np

# 设定随机种子（保证可复现性）
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# 设定超参数
num_folds = 5  # 5折交叉验证

# 初始化存储评估指标的列表
accuracy_list = []
f1_list = []
roc_auc_list = []
mcc_list = []

# 创建 5-Fold 交叉验证对象
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)

# 进行五折交叉验证
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f'Fold {fold + 1}/{num_folds}')
    
    # 划分训练集和验证集
    train_subsampler = torch.utils.data.Subset(dataset, train_idx)
    val_subsampler = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

    # 重新初始化模型和优化器
    model = IMTCNModel(input_size, num_channels, output_size).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # 训练循环
    num_epochs = 300  # 训练轮数（固定不变）
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_predictions = []

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # 前向传播
            outputs = model(data)
            loss = criterion(outputs, target.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * data.size(0)
            probs = torch.sigmoid(outputs.squeeze(1))
            _, predicted = torch.max(probs, 1)
            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)
        f1 = f1_score(all_labels, all_predictions)

        print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, F1: {f1:.4f}')
    
    # 不保存模型，直接使用最后一轮训练的模型进行评估
    model.eval()
    all_labels = []
    all_predictions = []
    all_probs = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            probabilities = torch.softmax(outputs, dim=1)[:, 1]  # 取正类概率
            preds = torch.argmax(outputs, dim=1)

            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probs.extend(probabilities.cpu().numpy())

    # 计算指标
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_probs)
    mcc = matthews_corrcoef(all_labels, all_predictions)

    print(f'Fold {fold + 1} Evaluation - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}')
    
    # 存储每折的结果
    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_auc_list.append(roc_auc)
    mcc_list.append(mcc)

# 计算五折交叉验证的平均指标
print("\nFinal 5-Fold Cross-Validation Results:")
print(f'Average Accuracy: {np.mean(accuracy_list):.4f} ± {np.std(accuracy_list):.4f}')
print(f'Average F1 Score: {np.mean(f1_list):.4f} ± {np.std(f1_list):.4f}')
print(f'Average ROC AUC: {np.mean(roc_auc_list):.4f} ± {np.std(roc_auc_list):.4f}')
print(f'Average MCC: {np.mean(mcc_list):.4f} ± {np.std(mcc_list):.4f}')


Fold 1/5
Fold 1, Epoch 1/300, Loss: 3.9631, F1: 0.3346
Fold 1, Epoch 2/300, Loss: 0.7274, F1: 0.2255
Fold 1, Epoch 3/300, Loss: 0.6522, F1: 0.2617
Fold 1, Epoch 4/300, Loss: 0.6076, F1: 0.3625
Fold 1, Epoch 5/300, Loss: 0.5746, F1: 0.4493
Fold 1, Epoch 6/300, Loss: 0.6159, F1: 0.4254
Fold 1, Epoch 7/300, Loss: 0.5821, F1: 0.5103
Fold 1, Epoch 8/300, Loss: 0.5512, F1: 0.5000
Fold 1, Epoch 9/300, Loss: 0.5264, F1: 0.5664
Fold 1, Epoch 10/300, Loss: 0.5766, F1: 0.5283
Fold 1, Epoch 11/300, Loss: 0.5249, F1: 0.5650
Fold 1, Epoch 12/300, Loss: 0.5334, F1: 0.5738
Fold 1, Epoch 13/300, Loss: 0.5120, F1: 0.5613
Fold 1, Epoch 14/300, Loss: 0.5074, F1: 0.5714
Fold 1, Epoch 15/300, Loss: 0.5179, F1: 0.5576
Fold 1, Epoch 16/300, Loss: 0.5121, F1: 0.5751
Fold 1, Epoch 17/300, Loss: 0.5010, F1: 0.5870
Fold 1, Epoch 18/300, Loss: 0.5211, F1: 0.5849
Fold 1, Epoch 19/300, Loss: 0.5118, F1: 0.5796
Fold 1, Epoch 20/300, Loss: 0.4782, F1: 0.6165
Fold 1, Epoch 21/300, Loss: 0.5086, F1: 0.5869
Fold 1, Epoch

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TimeAwareAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, attention_dim):
        super(TimeAwareAttention, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.attention_layer = nn.Linear(hidden_dim, attention_dim)
        self.context_vector = nn.Linear(attention_dim, 1, bias=False)

    def attention(self, lstm_output):
        attention_score = torch.tanh(self.attention_layer(lstm_output))
        attention_weights = F.softmax(self.context_vector(attention_score), dim=1)
        weighted_output = lstm_output * attention_weights
        return torch.sum(weighted_output, dim=1), attention_weights

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        attention_out, attention_weights = self.attention(lstm_out)
        return attention_out, attention_weights

class SDTANClassificationModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, attention_dim, num_classes):
        super(SDTANClassificationModel, self).__init__()
        self.time_attention = TimeAwareAttention(input_dim, hidden_dim, attention_dim)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        attention_out, attention_weights = self.time_attention(x)
        output = self.fc(attention_out)
        output = F.softmax(output, dim=1)  # Applying softmax for multi-class classification
        return output, attention_weights



In [18]:
input_dim = 55  # Matching the last dimension of your data shape [32, 7, 55]
hidden_dim = 64
attention_dim = 32
num_classes = 2  # Example: 3-class classification, change according to your task

model = SDTANClassificationModel(input_dim, hidden_dim, attention_dim, num_classes)
sample_input = torch.rand(32, 7, 55)  # Matching the input data shape [32, 7, 55]
output, attention_weights = model(sample_input)
print("Model output shape:", output.shape)

Model output shape: torch.Size([32, 2])


In [22]:
## Model Checkpointing epochs=10000
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, matthews_corrcoef
from sklearn.model_selection import StratifiedKFold
import torch
import numpy as np

# 设定随机种子（以保证可复现性）
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# 设定超参数
num_folds = 5  # 5折交叉验证
best_model_path = 'save/IMTCN_best_model.pth'  # 最佳模型保存路径

# 初始化存储评估指标的列表
accuracy_list = []
f1_list = []
roc_auc_list = []
mcc_list = []

# 创建 5-Fold 交叉验证对象
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)



# 进行五折交叉验证
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f'Fold {fold + 1}/{num_folds}')
    
    # 划分训练集和验证集
    train_subsampler = torch.utils.data.Subset(dataset, train_idx)
    val_subsampler = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

    # 重新初始化模型和优化器
    model = SDTANClassificationModel(input_dim, hidden_dim, attention_dim, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    best_f1 = 0.0  # 记录当前 fold 内的最佳 F1

    # 训练循环
    num_epochs = 1000  # 训练轮数
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_predictions = []

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # 前向传播
            outputs,_ = model(data)
            loss = criterion(outputs, target.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * data.size(0)
            probs = torch.sigmoid(outputs.squeeze(1))
            _, predicted = torch.max(probs, 1)
            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)
        f1 = f1_score(all_labels, all_predictions)

        print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, F1: {f1:.4f}')
        
        # 记录最优模型
        if f1 > best_f1:
            best_f1 = f1
            torch.save(model.state_dict(), best_model_path)
            print(f'Saved best model for fold {fold + 1} with F1: {best_f1:.4f}')

    # 加载最佳模型进行验证
    model.load_state_dict(torch.load(best_model_path))
    model.eval()
    all_labels = []
    all_predictions = []
    all_probs = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs,_ = model(data)
            probabilities = torch.softmax(outputs, dim=1)[:, 1]  # 取正类概率
            preds = torch.argmax(outputs, dim=1)

            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probs.extend(probabilities.cpu().numpy())

    # 计算指标
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_probs)
    mcc = matthews_corrcoef(all_labels, all_predictions)

    print(f'Fold {fold + 1} Evaluation - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}')
    
    # 存储每折的结果
    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_auc_list.append(roc_auc)
    mcc_list.append(mcc)

# 计算五折交叉验证的平均指标
print("\nFinal 5-Fold Cross-Validation Results:")
print(f'Average Accuracy: {np.mean(accuracy_list):.4f} ± {np.std(accuracy_list):.4f}')
print(f'Average F1 Score: {np.mean(f1_list):.4f} ± {np.std(f1_list):.4f}')
print(f'Average ROC AUC: {np.mean(roc_auc_list):.4f} ± {np.std(roc_auc_list):.4f}')
print(f'Average MCC: {np.mean(mcc_list):.4f} ± {np.std(mcc_list):.4f}')


Fold 1/5
Fold 1, Epoch 1/1000, Loss: 0.6432, F1: 0.0694
Saved best model for fold 1 with F1: 0.0694
Fold 1, Epoch 2/1000, Loss: 0.6263, F1: 0.0000
Fold 1, Epoch 3/1000, Loss: 0.6205, F1: 0.0000
Fold 1, Epoch 4/1000, Loss: 0.6046, F1: 0.0234
Fold 1, Epoch 5/1000, Loss: 0.5856, F1: 0.2924
Saved best model for fold 1 with F1: 0.2924
Fold 1, Epoch 6/1000, Loss: 0.5633, F1: 0.4685
Saved best model for fold 1 with F1: 0.4685
Fold 1, Epoch 7/1000, Loss: 0.5548, F1: 0.4846
Saved best model for fold 1 with F1: 0.4846
Fold 1, Epoch 8/1000, Loss: 0.5469, F1: 0.5066
Saved best model for fold 1 with F1: 0.5066
Fold 1, Epoch 9/1000, Loss: 0.5397, F1: 0.5445
Saved best model for fold 1 with F1: 0.5445
Fold 1, Epoch 10/1000, Loss: 0.5429, F1: 0.5246
Fold 1, Epoch 11/1000, Loss: 0.5807, F1: 0.3571
Fold 1, Epoch 12/1000, Loss: 0.5450, F1: 0.5382
Fold 1, Epoch 13/1000, Loss: 0.5405, F1: 0.5256
Fold 1, Epoch 14/1000, Loss: 0.5459, F1: 0.5124
Fold 1, Epoch 15/1000, Loss: 0.5339, F1: 0.5750
Saved best model

In [23]:
##epoch 500
from sklearn.model_selection import StratifiedKFold
import torch
import numpy as np

# 设定随机种子（保证可复现性）
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# 设定超参数
num_folds = 5  # 5折交叉验证

# 初始化存储评估指标的列表
accuracy_list = []
f1_list = []
roc_auc_list = []
mcc_list = []

# 创建 5-Fold 交叉验证对象
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)

# 进行五折交叉验证
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f'Fold {fold + 1}/{num_folds}')
    
    # 划分训练集和验证集
    train_subsampler = torch.utils.data.Subset(dataset, train_idx)
    val_subsampler = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

    # 重新初始化模型和优化器
    model = SDTANClassificationModel(input_dim, hidden_dim, attention_dim, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # 训练循环
    num_epochs = 500  # 训练轮数（固定不变）
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_predictions = []

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # 前向传播
            outputs,_ = model(data)
            loss = criterion(outputs, target.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * data.size(0)
            probs = torch.sigmoid(outputs.squeeze(1))
            _, predicted = torch.max(probs, 1)
            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)
        f1 = f1_score(all_labels, all_predictions)

        print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, F1: {f1:.4f}')
    
    # 不保存模型，直接使用最后一轮训练的模型进行评估
    model.eval()
    all_labels = []
    all_predictions = []
    all_probs = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs,_ = model(data)
            probabilities = torch.softmax(outputs, dim=1)[:, 1]  # 取正类概率
            preds = torch.argmax(outputs, dim=1)

            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probs.extend(probabilities.cpu().numpy())

    # 计算指标
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_probs)
    mcc = matthews_corrcoef(all_labels, all_predictions)

    print(f'Fold {fold + 1} Evaluation - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}')
    
    # 存储每折的结果
    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_auc_list.append(roc_auc)
    mcc_list.append(mcc)

# 计算五折交叉验证的平均指标
print("\nFinal 5-Fold Cross-Validation Results:")
print(f'Average Accuracy: {np.mean(accuracy_list):.4f} ± {np.std(accuracy_list):.4f}')
print(f'Average F1 Score: {np.mean(f1_list):.4f} ± {np.std(f1_list):.4f}')
print(f'Average ROC AUC: {np.mean(roc_auc_list):.4f} ± {np.std(roc_auc_list):.4f}')
print(f'Average MCC: {np.mean(mcc_list):.4f} ± {np.std(mcc_list):.4f}')


Fold 1/5
Fold 1, Epoch 1/500, Loss: 0.6432, F1: 0.0694
Fold 1, Epoch 2/500, Loss: 0.6263, F1: 0.0000
Fold 1, Epoch 3/500, Loss: 0.6205, F1: 0.0000
Fold 1, Epoch 4/500, Loss: 0.6046, F1: 0.0234
Fold 1, Epoch 5/500, Loss: 0.5856, F1: 0.2924
Fold 1, Epoch 6/500, Loss: 0.5633, F1: 0.4685
Fold 1, Epoch 7/500, Loss: 0.5548, F1: 0.4846
Fold 1, Epoch 8/500, Loss: 0.5469, F1: 0.5066
Fold 1, Epoch 9/500, Loss: 0.5397, F1: 0.5445
Fold 1, Epoch 10/500, Loss: 0.5429, F1: 0.5246
Fold 1, Epoch 11/500, Loss: 0.5807, F1: 0.3571
Fold 1, Epoch 12/500, Loss: 0.5450, F1: 0.5382
Fold 1, Epoch 13/500, Loss: 0.5405, F1: 0.5256
Fold 1, Epoch 14/500, Loss: 0.5459, F1: 0.5124
Fold 1, Epoch 15/500, Loss: 0.5339, F1: 0.5750
Fold 1, Epoch 16/500, Loss: 0.5379, F1: 0.5273
Fold 1, Epoch 17/500, Loss: 0.5379, F1: 0.5506
Fold 1, Epoch 18/500, Loss: 0.5323, F1: 0.5623
Fold 1, Epoch 19/500, Loss: 0.5365, F1: 0.5448
Fold 1, Epoch 20/500, Loss: 0.5341, F1: 0.5556
Fold 1, Epoch 21/500, Loss: 0.5441, F1: 0.5245
Fold 1, Epoch

In [27]:
##epoch 800
from sklearn.model_selection import StratifiedKFold
import torch
import numpy as np

# 设定随机种子（保证可复现性）
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# 设定超参数
num_folds = 5  # 5折交叉验证

# 初始化存储评估指标的列表
accuracy_list = []
f1_list = []
roc_auc_list = []
mcc_list = []

# 创建 5-Fold 交叉验证对象
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)

# 进行五折交叉验证
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    print(f'Fold {fold + 1}/{num_folds}')
    
    # 划分训练集和验证集
    train_subsampler = torch.utils.data.Subset(dataset, train_idx)
    val_subsampler = torch.utils.data.Subset(dataset, val_idx)

    train_loader = DataLoader(train_subsampler, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subsampler, batch_size=32, shuffle=False)

    # 重新初始化模型和优化器
    model = SDTANClassificationModel(input_dim, hidden_dim, attention_dim, num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    # 训练循环
    num_epochs = 300  # 训练轮数（固定不变）
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_labels = []
        all_predictions = []

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            
            # 前向传播
            outputs,_ = model(data)
            loss = criterion(outputs, target.long())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * data.size(0)
            probs = torch.sigmoid(outputs.squeeze(1))
            _, predicted = torch.max(probs, 1)
            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)
        f1 = f1_score(all_labels, all_predictions)

        print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, F1: {f1:.4f}')
    
    # 不保存模型，直接使用最后一轮训练的模型进行评估
    model.eval()
    all_labels = []
    all_predictions = []
    all_probs = []

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs,_ = model(data)
            probabilities = torch.softmax(outputs, dim=1)[:, 1]  # 取正类概率
            preds = torch.argmax(outputs, dim=1)

            all_labels.extend(target.cpu().numpy())
            all_predictions.extend(preds.cpu().numpy())
            all_probs.extend(probabilities.cpu().numpy())

    # 计算指标
    accuracy = accuracy_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    roc_auc = roc_auc_score(all_labels, all_probs)
    mcc = matthews_corrcoef(all_labels, all_predictions)

    print(f'Fold {fold + 1} Evaluation - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}, MCC: {mcc:.4f}')
    
    # 存储每折的结果
    accuracy_list.append(accuracy)
    f1_list.append(f1)
    roc_auc_list.append(roc_auc)
    mcc_list.append(mcc)

# 计算五折交叉验证的平均指标
print("\nFinal 5-Fold Cross-Validation Results:")
print(f'Average Accuracy: {np.mean(accuracy_list):.4f} ± {np.std(accuracy_list):.4f}')
print(f'Average F1 Score: {np.mean(f1_list):.4f} ± {np.std(f1_list):.4f}')
print(f'Average ROC AUC: {np.mean(roc_auc_list):.4f} ± {np.std(roc_auc_list):.4f}')
print(f'Average MCC: {np.mean(mcc_list):.4f} ± {np.std(mcc_list):.4f}')


Fold 1/5
Fold 1, Epoch 1/300, Loss: 0.6432, F1: 0.0694
Fold 1, Epoch 2/300, Loss: 0.6263, F1: 0.0000
Fold 1, Epoch 3/300, Loss: 0.6205, F1: 0.0000
Fold 1, Epoch 4/300, Loss: 0.6046, F1: 0.0234
Fold 1, Epoch 5/300, Loss: 0.5856, F1: 0.2924
Fold 1, Epoch 6/300, Loss: 0.5633, F1: 0.4685
Fold 1, Epoch 7/300, Loss: 0.5548, F1: 0.4846
Fold 1, Epoch 8/300, Loss: 0.5469, F1: 0.5066
Fold 1, Epoch 9/300, Loss: 0.5397, F1: 0.5445
Fold 1, Epoch 10/300, Loss: 0.5429, F1: 0.5246
Fold 1, Epoch 11/300, Loss: 0.5807, F1: 0.3571
Fold 1, Epoch 12/300, Loss: 0.5450, F1: 0.5382
Fold 1, Epoch 13/300, Loss: 0.5405, F1: 0.5256
Fold 1, Epoch 14/300, Loss: 0.5459, F1: 0.5124
Fold 1, Epoch 15/300, Loss: 0.5339, F1: 0.5750
Fold 1, Epoch 16/300, Loss: 0.5379, F1: 0.5273
Fold 1, Epoch 17/300, Loss: 0.5379, F1: 0.5506
Fold 1, Epoch 18/300, Loss: 0.5323, F1: 0.5623
Fold 1, Epoch 19/300, Loss: 0.5365, F1: 0.5448
Fold 1, Epoch 20/300, Loss: 0.5341, F1: 0.5556
Fold 1, Epoch 21/300, Loss: 0.5441, F1: 0.5245
Fold 1, Epoch