In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import os
import random
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  # 你可以换成其它数字
# 数据集类
class ProteinNPYDataset(Dataset):
    def __init__(self, pos_path, neg_path):
        self.pos = np.load(pos_path, mmap_mode='r')
        self.neg = np.load(neg_path, mmap_mode='r')
        self.lengths = [len(self.pos), len(self.neg)]
        self.total_len = self.lengths[0] + self.lengths[1]

    def __len__(self):
        return self.total_len

    def __getitem__(self, idx):
        if idx < self.lengths[0]:
            x = self.pos[idx]
            y = 1
        else:
            x = self.neg[idx - self.lengths[0]]
            y = 0
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

In [3]:
class MLPExperts(nn.Module):
    def __init__(self, d_model, d_ff, num_experts):
        super().__init__()
        self.num_experts = num_experts
        self.fc1 = nn.Linear(d_model, d_ff * num_experts, bias=True)
        self.fc2 = nn.Linear(d_ff, d_model, bias=True)
        self.d_ff = d_ff
    def forward(self, x, expert_idx):
        # x: [B*L, d_model], expert_idx: [B*L, k]
        all_hidden = self.fc1(x)  # [B*L, d_ff * num_experts]
        all_hidden = all_hidden.view(x.size(0), self.num_experts, self.d_ff)  # [B*L, num_experts, d_ff]
        out = []
        for i in range(expert_idx.size(1)):
            idx = expert_idx[:, i]  # [B*L]
            hidden = all_hidden[torch.arange(x.size(0)), idx]  # [B*L, d_ff]
            hidden = F.gelu(hidden)
            out_i = self.fc2(hidden)  # [B*L, d_model]
            out.append(out_i)
        out = torch.stack(out, dim=1)  # [B*L, k, d_model]
        return out
class NoisyTopKMoE(nn.Module):
    def __init__(self, d_model, d_ff, num_experts=30, k=2, noisy_std=1.0):
        super().__init__()
        self.num_experts = num_experts
        self.k = k
        self.noisy_std = noisy_std
        self.experts = MLPExperts(d_model, d_ff, num_experts)
        self.gate = nn.Linear(d_model, num_experts)
    def forward(self, x):
        # x: [B, L, d_model]
        B, L, D = x.shape
        x_flat = x.reshape(-1, D)  # [B*L, D]
        gate_logits = self.gate(x_flat)  # [B*L, num_experts]
        # Noisy gating
        if self.training and self.noisy_std > 0:
            noise = torch.randn_like(gate_logits) * self.noisy_std
            gate_logits = gate_logits + noise
        gate_scores = F.softmax(gate_logits, dim=-1)  # [B*L, num_experts]

          # 稀疏路由：只选top-k
        topk_val, topk_idx = torch.topk(gate_scores, self.k, dim=-1)  # [B*L, k]
        # 负载均衡损失（新版，防止爆炸）
        meangate = gate_scores.mean(dim=0)  # [num_experts]
        load_balance_loss = (meangate * meangate).sum() * (self.num_experts ** 2)
        # 专家并行输出
        expert_outs = self.experts(x_flat, topk_idx)  # [B*L, k, d_model]
        topk_val = topk_val / (topk_val.sum(dim=-1, keepdim=True) + 1e-9)
        moe_out = (expert_outs * topk_val.unsqueeze(-1)).sum(dim=1)  # [B*L, d_model]
        moe_out = moe_out.view(B, L, D)
        return moe_out, load_balance_loss

In [4]:
class TransformerMoEBlock(nn.Module):
    def __init__(self, d_model, nhead, d_ff, num_experts=30, k=2, dropout=0.1, noisy_std=1.0):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=True)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.moe = NoisyTopKMoE(d_model, d_ff, num_experts, k, noisy_std)
        self.dropout = nn.Dropout(dropout)
    def forward(self, x):
        attn_out, _ = self.self_attn(x, x, x)
        x = x + self.dropout(attn_out)
        x = self.norm1(x)
        moe_out, load_balance_loss = self.moe(x)
        x = x + self.dropout(moe_out)
        x = self.norm2(x)
        return x, load_balance_loss

class TransformerMoE(nn.Module):
    def __init__(self, d_model=1152, nhead=8, d_ff=2048, num_layers=4, num_experts=30, k=2, dropout=0.1, noisy_std=1.0, num_classes=2):
        super().__init__()
        self.layers = nn.ModuleList([
            TransformerMoEBlock(d_model, nhead, d_ff, num_experts, k, dropout, noisy_std)
            for _ in range(num_layers)
        ])
        self.classifier = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, num_classes)
        )
    def forward(self, x):
        total_load_balance_loss = 0
        for layer in self.layers:
            x, lb_loss = layer(x)
            total_load_balance_loss += lb_loss
        x = x.mean(dim=1)  # 池化
        logits = self.classifier(x)
        return logits, total_load_balance_loss

In [5]:
def eval_model(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits, _ = model(x)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
    acc = accuracy_score(all_labels, all_preds)
    pre = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    mcc = matthews_corrcoef(all_labels, all_preds)
    print(f"Test ACC: {acc:.4f}, PRE: {pre:.4f}, REC: {rec:.4f}, F1: {f1:.4f}, MCC: {mcc:.4f}")
    return acc, pre, rec, f1, mcc

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
from torch.cuda.amp import autocast, GradScaler
# 数据路径
train_pos = '/exp_data/sjx/star/first_data/ESM-embedding/positive_train_embedding.npy'
train_neg = '/exp_data/sjx/star/gan_data/negative_train_all_combined.npy'

train_dataset = ProteinNPYDataset(train_pos, train_neg)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerMoE(
    d_model=1152, nhead=8, d_ff=2048, num_layers=4, num_experts=30, k=3, dropout=0.1, noisy_std=1.0, num_classes=2
).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

scaler = GradScaler()  # 在训练前初始化

def train_one_epoch(model, loader, optimizer, criterion, device, moe_loss_weight=0.01, scaler=None):
    model.train()
    total_loss = 0
    for x, y in tqdm(loader, desc="Training", leave=False):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        with autocast():  # 开启混合精度
            logits, lb_loss = model(x)
            loss = criterion(logits, y) + moe_loss_weight * lb_loss
        scaler.scale(loss).backward()      # 用scaler缩放loss反向传播
        scaler.step(optimizer)             # 用scaler.step更新参数
        scaler.update()                    # 更新scaler状态
        total_loss += loss.item()
    return total_loss / len(loader)

# 初始化scaler
scaler = GradScaler()

# 训练主循环
epochs = 10
best_acc = 0
best_state = None
best_path = "/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_best.pth"
last_path = "/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth"

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device, scaler=scaler)
    print(f"Train Loss: {train_loss:.4f}")
    # 保存最后一次模型权重
    torch.save(model.state_dict(), last_path)
    print(f"Last model saved at epoch {epoch+1} ({last_path})")

  scaler = GradScaler()  # 在训练前初始化
  scaler = GradScaler()



Epoch 1/10


  with autocast():  # 开启混合精度
                                                           

Train Loss: 1.5538
Last model saved at epoch 1 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 2/10


                                                           

Train Loss: 1.3767
Last model saved at epoch 2 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 3/10


                                                           

Train Loss: 1.3747
Last model saved at epoch 3 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 4/10


                                                           

Train Loss: 1.3480
Last model saved at epoch 4 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 5/10


                                                           

Train Loss: 1.3430
Last model saved at epoch 5 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 6/10


                                                           

Train Loss: 1.3342
Last model saved at epoch 6 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 7/10


                                                           

Train Loss: 1.3241
Last model saved at epoch 7 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 8/10


                                                           

Train Loss: 1.3184
Last model saved at epoch 8 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 9/10


                                                           

Train Loss: 1.3021
Last model saved at epoch 9 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)

Epoch 10/10


                                                           

Train Loss: 1.3076
Last model saved at epoch 10 (/exp_data/sjx/star/main_transformer_moe_weight/transformer_moe_last.pth)


In [8]:
# 1. 加载模型
model = TransformerMoE(
    d_model=1152, nhead=8, d_ff=2048, num_layers=4, num_experts=30, k=3, dropout=0.1, noisy_std=1.0, num_classes=2
).to(device)
model.load_state_dict(torch.load('/exp_data/sjx/star/main_transformer_moe_weight/best_transformer_moe_last.pth', map_location=device))
model.eval()

# 2. 加载测试集
test_pos = '/exp_data/sjx/star/first_data/ESM-embedding/positive_test_embedding.npy'
test_neg = '/exp_data/sjx/star/first_data/ESM-embedding/negative_test_embedding.npy'
test_dataset = ProteinNPYDataset(test_pos, test_neg)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# 扩展评估指标函数
def eval_model_extended(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []
    all_probs = []  # 存储预测概率用于AUC计算
    
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits, _ = model(x)
            probs = torch.softmax(logits, dim=1)
            preds = torch.argmax(logits, dim=1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
            all_probs.extend(probs[:, 1].cpu().numpy())  # 正类的概率
    
    from sklearn.metrics import (
        accuracy_score, precision_score, recall_score, f1_score, 
        matthews_corrcoef, confusion_matrix, roc_auc_score, average_precision_score
    )
    
    # 基础指标
    acc = accuracy_score(all_labels, all_preds)
    pre = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    mcc = matthews_corrcoef(all_labels, all_preds)
    
    # 混淆矩阵计算Sn和Sp
    tn, fp, fn, tp = confusion_matrix(all_labels, all_preds).ravel()
    sn = tp / (tp + fn) if (tp + fn) > 0 else 0  # 敏感性 (Sensitivity/Recall)
    sp = tn / (tn + fp) if (tn + fp) > 0 else 0  # 特异性 (Specificity)
    
    # AUC和AUPRC
    auc = roc_auc_score(all_labels, all_probs)
    auprc = average_precision_score(all_labels, all_probs)
    
    print(f"Test ACC: {acc:.4f}, PRE: {pre:.4f}, REC: {rec:.4f}, F1: {f1:.4f}, MCC: {mcc:.4f}")
    print(f"Sn: {sn:.4f}, Sp: {sp:.4f}, AUC: {auc:.4f}, AUPRC: {auprc:.4f}")
    
    return acc, pre, rec, f1, mcc, sn, sp, auc, auprc

# 示例：在测试集上评估
print("\n========== Extended Evaluation Metrics ==========")
eval_model_extended(model, test_loader, device)

  model.load_state_dict(torch.load('/exp_data/sjx/star/main_transformer_moe_weight/best_transformer_moe_last.pth', map_location=device))



Test ACC: 0.9225, PRE: 0.9483, REC: 0.9425, F1: 0.9454, MCC: 0.8120
Sn: 0.9425, Sp: 0.8731, AUC: 0.9685, AUPRC: 0.9869


(0.9225413402959095,
 0.948339483394834,
 0.9425427872860636,
 0.9454322501532803,
 0.8120485793877618,
 0.9425427872860636,
 0.8731117824773413,
 0.9685438657398856,
 0.9869455888266454)

In [7]:
# 假设模型结构和ProteinNPYDataset已定义，device已设置
import torch

# 1. 加载模型
model = TransformerMoE(
    d_model=1152, nhead=8, d_ff=2048, num_layers=4, num_experts=30, k=3, dropout=0.1, noisy_std=1.0, num_classes=2
).to(device)
model.load_state_dict(torch.load('/exp_data/sjx/star/main_transformer_moe_weight/best_transformer_moe_last.pth', map_location=device))
model.eval()

# 2. 加载测试集
test_pos = '/exp_data/sjx/star/first_data/ESM-embedding/positive_test_embedding.npy'
test_neg = '/exp_data/sjx/star/first_data/ESM-embedding/negative_test_embedding.npy'
test_dataset = ProteinNPYDataset(test_pos, test_neg)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# 3. 定义评估函数
def eval_model(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits, _ = model(x)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
    acc = accuracy_score(all_labels, all_preds)
    pre = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    mcc = matthews_corrcoef(all_labels, all_preds)
    print(f"Test ACC: {acc:.4f}, PRE: {pre:.4f}, REC: {rec:.4f}, F1: {f1:.4f}, MCC: {mcc:.4f}")
    return acc, pre, rec, f1, mcc

# 4. 测试
eval_model(model, test_loader, device)

  model.load_state_dict(torch.load('/exp_data/sjx/star/main_transformer_moe_weight/best_transformer_moe_last.pth', map_location=device))


Test ACC: 0.9225, PRE: 0.9483, REC: 0.9425, F1: 0.9454, MCC: 0.8120


(0.9225413402959095,
 0.948339483394834,
 0.9425427872860636,
 0.9454322501532803,
 0.8120485793877618)

### 十折交叉验证

In [16]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import StratifiedKFold
from torch.cuda.amp import autocast, GradScaler
import random
import os
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [17]:
def train_one_epoch(model, loader, optimizer, criterion, device, moe_loss_weight=0.01, scaler=None):
    model.train()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        with autocast():
            logits, lb_loss = model(x)
            loss = criterion(logits, y) + moe_loss_weight * lb_loss
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
    return total_loss / len(loader)

def eval_model(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits, _ = model(x)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
    acc = accuracy_score(all_labels, all_preds)
    pre = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    mcc = matthews_corrcoef(all_labels, all_preds)
    print(f"Val ACC: {acc:.4f}, PRE: {pre:.4f}, REC: {rec:.4f}, F1: {f1:.4f}, MCC: {mcc:.4f}")
    return acc, pre, rec, f1, mcc

In [18]:
train_pos = '/exp_data/sjx/star/first_data/ESM-embedding/positive_train_embedding.npy'
train_neg = '/exp_data/sjx/star/gan_data/negative_train_all_combined.npy'

# 构建全体索引和标签
pos_len = np.load(train_pos, mmap_mode='r').shape[0]
neg_len = np.load(train_neg, mmap_mode='r').shape[0]
all_indices = np.concatenate([np.arange(pos_len), np.arange(neg_len) + pos_len])
all_labels = np.concatenate([np.ones(pos_len, dtype=int), np.zeros(neg_len, dtype=int)])

# 数据集
full_dataset = ProteinNPYDataset(train_pos, train_neg)

# K折分层
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
all_metrics = []

for fold, (train_idx, val_idx) in enumerate(skf.split(all_indices, all_labels), 1):
    print(f"\n========== Fold {fold}/10 ==========")
    train_loader = DataLoader(Subset(full_dataset, train_idx), batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(Subset(full_dataset, val_idx), batch_size=64, shuffle=False, num_workers=2)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = TransformerMoE(
        d_model=1152, nhead=8, d_ff=2048, num_layers=4, num_experts=30, k=3, dropout=0.1, noisy_std=1.0, num_classes=2
    ).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()
    scaler = GradScaler()

    best_acc = 0
    best_state = None
    epochs = 10

    for epoch in range(epochs):
        print(f"\n[Fold {fold}] Epoch {epoch+1}/{epochs}")
        model.train()
        total_loss = 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            with autocast():
                logits, lb_loss = model(x)
                loss = criterion(logits, y) + 0.01 * lb_loss
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item()
        print(f"Train Loss: {total_loss / len(train_loader):.4f}")

        # 验证
        model.eval()
        all_preds, all_labels_fold = [], []
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                logits, _ = model(x)
                preds = torch.argmax(logits, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels_fold.extend(y.cpu().numpy())
        from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef
        acc = accuracy_score(all_labels_fold, all_preds)
        pre = precision_score(all_labels_fold, all_preds)
        rec = recall_score(all_labels_fold, all_preds)
        f1 = f1_score(all_labels_fold, all_preds)
        mcc = matthews_corrcoef(all_labels_fold, all_preds)
        print(f"Val ACC: {acc:.4f}, PRE: {pre:.4f}, REC: {rec:.4f}, F1: {f1:.4f}, MCC: {mcc:.4f}")

        if acc > best_acc:
            best_acc = acc
            best_state = model.state_dict()
            torch.save(best_state, f"/exp_data/sjx/star/main_transformer_moe_weight/cv_point/best_fold{fold}.pth")
            print(f"Best model saved for fold {fold} at epoch {epoch+1}")

    all_metrics.append((best_acc, pre, rec, f1, mcc))
    print(f"[Fold {fold}] Best ACC: {best_acc:.4f}")

# 汇总结果
all_metrics = np.array(all_metrics)
print("\n========== 10-Fold CV Results ==========")
print(f"Mean ACC: {all_metrics[:,0].mean():.4f} ± {all_metrics[:,0].std():.4f}")
print(f"Mean PRE: {all_metrics[:,1].mean():.4f}")
print(f"Mean REC: {all_metrics[:,2].mean():.4f}")
print(f"Mean F1:  {all_metrics[:,3].mean():.4f}")
print(f"Mean MCC: {all_metrics[:,4].mean():.4f}")



[Fold 1] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.7336
Val ACC: 0.9176, PRE: 0.8870, REC: 0.9573, F1: 0.9208, MCC: 0.8377
Best model saved for fold 1 at epoch 1

[Fold 1] Epoch 2/10


  with autocast():


Train Loss: 1.3743
Val ACC: 0.9206, PRE: 0.9207, REC: 0.9207, F1: 0.9207, MCC: 0.8412
Best model saved for fold 1 at epoch 2

[Fold 1] Epoch 3/10


  with autocast():


Train Loss: 1.3609
Val ACC: 0.9130, PRE: 0.9472, REC: 0.8750, F1: 0.9097, MCC: 0.8284

[Fold 1] Epoch 4/10


  with autocast():


Train Loss: 1.3552
Val ACC: 0.9282, PRE: 0.8871, REC: 0.9817, F1: 0.9320, MCC: 0.8614
Best model saved for fold 1 at epoch 4

[Fold 1] Epoch 5/10


  with autocast():


Train Loss: 1.3404
Val ACC: 0.9206, PRE: 0.8943, REC: 0.9543, F1: 0.9233, MCC: 0.8431

[Fold 1] Epoch 6/10


  with autocast():


Train Loss: 1.3362
Val ACC: 0.9313, PRE: 0.9101, REC: 0.9573, F1: 0.9331, MCC: 0.8637
Best model saved for fold 1 at epoch 6

[Fold 1] Epoch 7/10


  with autocast():


Train Loss: 1.3225
Val ACC: 0.9328, PRE: 0.9034, REC: 0.9695, F1: 0.9353, MCC: 0.8680
Best model saved for fold 1 at epoch 7

[Fold 1] Epoch 8/10


  with autocast():


Train Loss: 1.3172
Val ACC: 0.9176, PRE: 0.9335, REC: 0.8994, F1: 0.9161, MCC: 0.8357

[Fold 1] Epoch 9/10


  with autocast():


Train Loss: 1.3054
Val ACC: 0.9252, PRE: 0.9043, REC: 0.9512, F1: 0.9272, MCC: 0.8515

[Fold 1] Epoch 10/10


  with autocast():


Train Loss: 1.2861
Val ACC: 0.9252, PRE: 0.9240, REC: 0.9268, F1: 0.9254, MCC: 0.8504
[Fold 1] Best ACC: 0.9328


[Fold 2] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.6946
Val ACC: 0.9420, PRE: 0.9530, REC: 0.9297, F1: 0.9412, MCC: 0.8842
Best model saved for fold 2 at epoch 1

[Fold 2] Epoch 2/10


  with autocast():


Train Loss: 1.3795
Val ACC: 0.9115, PRE: 0.8530, REC: 0.9939, F1: 0.9181, MCC: 0.8344

[Fold 2] Epoch 3/10


  with autocast():


Train Loss: 1.3732
Val ACC: 0.9420, PRE: 0.9140, REC: 0.9755, F1: 0.9438, MCC: 0.8860

[Fold 2] Epoch 4/10


  with autocast():


Train Loss: 1.3498
Val ACC: 0.9527, PRE: 0.9405, REC: 0.9664, F1: 0.9532, MCC: 0.9057
Best model saved for fold 2 at epoch 4

[Fold 2] Epoch 5/10


  with autocast():


Train Loss: 1.3453
Val ACC: 0.9496, PRE: 0.9298, REC: 0.9725, F1: 0.9507, MCC: 0.9002

[Fold 2] Epoch 6/10


  with autocast():


Train Loss: 1.3256
Val ACC: 0.8992, PRE: 0.9336, REC: 0.8593, F1: 0.8949, MCC: 0.8010

[Fold 2] Epoch 7/10


  with autocast():


Train Loss: 1.3231
Val ACC: 0.9191, PRE: 0.9597, REC: 0.8746, F1: 0.9152, MCC: 0.8415

[Fold 2] Epoch 8/10


  with autocast():


Train Loss: 1.3168
Val ACC: 0.9115, PRE: 0.9622, REC: 0.8563, F1: 0.9061, MCC: 0.8279

[Fold 2] Epoch 9/10


  with autocast():


Train Loss: 1.3100
Val ACC: 0.9344, PRE: 0.9610, REC: 0.9052, F1: 0.9323, MCC: 0.8702

[Fold 2] Epoch 10/10


  with autocast():


Train Loss: 1.2960
Val ACC: 0.9176, PRE: 0.9596, REC: 0.8716, F1: 0.9135, MCC: 0.8386
[Fold 2] Best ACC: 0.9527


[Fold 3] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.5004
Val ACC: 0.9205, PRE: 0.9568, REC: 0.8807, F1: 0.9172, MCC: 0.8436
Best model saved for fold 3 at epoch 1

[Fold 3] Epoch 2/10


  with autocast():


Train Loss: 1.3890
Val ACC: 0.9281, PRE: 0.9430, REC: 0.9113, F1: 0.9269, MCC: 0.8568
Best model saved for fold 3 at epoch 2

[Fold 3] Epoch 3/10


  with autocast():


Train Loss: 1.3592
Val ACC: 0.9266, PRE: 0.9515, REC: 0.8991, F1: 0.9245, MCC: 0.8545

[Fold 3] Epoch 4/10


  with autocast():


Train Loss: 1.3496
Val ACC: 0.9266, PRE: 0.9067, REC: 0.9511, F1: 0.9284, MCC: 0.8542

[Fold 3] Epoch 5/10


  with autocast():


Train Loss: 1.3535
Val ACC: 0.9373, PRE: 0.9441, REC: 0.9297, F1: 0.9368, MCC: 0.8747
Best model saved for fold 3 at epoch 5

[Fold 3] Epoch 6/10


  with autocast():


Train Loss: 1.3345
Val ACC: 0.9251, PRE: 0.9572, REC: 0.8899, F1: 0.9223, MCC: 0.8523

[Fold 3] Epoch 7/10


  with autocast():


Train Loss: 1.3381
Val ACC: 0.9159, PRE: 0.9595, REC: 0.8685, F1: 0.9117, MCC: 0.8356

[Fold 3] Epoch 8/10


  with autocast():


Train Loss: 1.3212
Val ACC: 0.9343, PRE: 0.9494, REC: 0.9174, F1: 0.9331, MCC: 0.8690

[Fold 3] Epoch 9/10


  with autocast():


Train Loss: 1.3075
Val ACC: 0.9297, PRE: 0.9323, REC: 0.9266, F1: 0.9294, MCC: 0.8593

[Fold 3] Epoch 10/10


  with autocast():


Train Loss: 1.3073
Val ACC: 0.9343, PRE: 0.9465, REC: 0.9205, F1: 0.9333, MCC: 0.8688
[Fold 3] Best ACC: 0.9373


[Fold 4] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.5812
Val ACC: 0.9419, PRE: 0.9164, REC: 0.9725, F1: 0.9436, MCC: 0.8854
Best model saved for fold 4 at epoch 1

[Fold 4] Epoch 2/10


  with autocast():


Train Loss: 1.3806
Val ACC: 0.9205, PRE: 0.8747, REC: 0.9817, F1: 0.9251, MCC: 0.8473

[Fold 4] Epoch 3/10


  with autocast():


Train Loss: 1.3636
Val ACC: 0.9404, PRE: 0.9500, REC: 0.9297, F1: 0.9397, MCC: 0.8809

[Fold 4] Epoch 4/10


  with autocast():


Train Loss: 1.3449
Val ACC: 0.9404, PRE: 0.9286, REC: 0.9541, F1: 0.9412, MCC: 0.8811

[Fold 4] Epoch 5/10


  with autocast():


Train Loss: 1.3446
Val ACC: 0.9434, PRE: 0.9265, REC: 0.9633, F1: 0.9445, MCC: 0.8876
Best model saved for fold 4 at epoch 5

[Fold 4] Epoch 6/10


  with autocast():


Train Loss: 1.3329
Val ACC: 0.9404, PRE: 0.9311, REC: 0.9511, F1: 0.9410, MCC: 0.8809

[Fold 4] Epoch 7/10


  with autocast():


Train Loss: 1.3304
Val ACC: 0.9266, PRE: 0.9401, REC: 0.9113, F1: 0.9255, MCC: 0.8536

[Fold 4] Epoch 8/10


  with autocast():


Train Loss: 1.3171
Val ACC: 0.9312, PRE: 0.9462, REC: 0.9144, F1: 0.9300, MCC: 0.8629

[Fold 4] Epoch 9/10


  with autocast():


Train Loss: 1.3050
Val ACC: 0.9266, PRE: 0.9401, REC: 0.9113, F1: 0.9255, MCC: 0.8536

[Fold 4] Epoch 10/10


  with autocast():


Train Loss: 1.3006
Val ACC: 0.9128, PRE: 0.8668, REC: 0.9755, F1: 0.9180, MCC: 0.8323
[Fold 4] Best ACC: 0.9434


[Fold 5] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.5659
Val ACC: 0.9251, PRE: 0.9162, REC: 0.9358, F1: 0.9259, MCC: 0.8503
Best model saved for fold 5 at epoch 1

[Fold 5] Epoch 2/10


  with autocast():


Train Loss: 1.3716
Val ACC: 0.9266, PRE: 0.8997, REC: 0.9602, F1: 0.9290, MCC: 0.8551
Best model saved for fold 5 at epoch 2

[Fold 5] Epoch 3/10


  with autocast():


Train Loss: 1.3626
Val ACC: 0.9205, PRE: 0.8873, REC: 0.9633, F1: 0.9238, MCC: 0.8441

[Fold 5] Epoch 4/10


  with autocast():


Train Loss: 1.3556
Val ACC: 0.9343, PRE: 0.9034, REC: 0.9725, F1: 0.9367, MCC: 0.8711
Best model saved for fold 5 at epoch 4

[Fold 5] Epoch 5/10


  with autocast():


Train Loss: 1.3380
Val ACC: 0.9220, PRE: 0.8898, REC: 0.9633, F1: 0.9251, MCC: 0.8469

[Fold 5] Epoch 6/10


  with autocast():


Train Loss: 1.3381
Val ACC: 0.9404, PRE: 0.9337, REC: 0.9480, F1: 0.9408, MCC: 0.8808
Best model saved for fold 5 at epoch 6

[Fold 5] Epoch 7/10


  with autocast():


Train Loss: 1.3227
Val ACC: 0.9144, PRE: 0.8774, REC: 0.9633, F1: 0.9184, MCC: 0.8327

[Fold 5] Epoch 8/10


  with autocast():


Train Loss: 1.3131
Val ACC: 0.9358, PRE: 0.9204, REC: 0.9541, F1: 0.9369, MCC: 0.8721

[Fold 5] Epoch 9/10


  with autocast():


Train Loss: 1.3076
Val ACC: 0.9358, PRE: 0.9204, REC: 0.9541, F1: 0.9369, MCC: 0.8721

[Fold 5] Epoch 10/10


  with autocast():


Train Loss: 1.2993
Val ACC: 0.9281, PRE: 0.9142, REC: 0.9450, F1: 0.9293, MCC: 0.8568
[Fold 5] Best ACC: 0.9404


[Fold 6] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.6475
Val ACC: 0.9404, PRE: 0.9586, REC: 0.9205, F1: 0.9392, MCC: 0.8814
Best model saved for fold 6 at epoch 1

[Fold 6] Epoch 2/10


  with autocast():


Train Loss: 1.3829
Val ACC: 0.9343, PRE: 0.9201, REC: 0.9511, F1: 0.9353, MCC: 0.8690

[Fold 6] Epoch 3/10


  with autocast():


Train Loss: 1.3668
Val ACC: 0.9373, PRE: 0.9256, REC: 0.9511, F1: 0.9382, MCC: 0.8749

[Fold 6] Epoch 4/10


  with autocast():


Train Loss: 1.3562
Val ACC: 0.9358, PRE: 0.9495, REC: 0.9205, F1: 0.9348, MCC: 0.8720

[Fold 6] Epoch 5/10


  with autocast():


Train Loss: 1.3476
Val ACC: 0.9450, PRE: 0.9533, REC: 0.9358, F1: 0.9444, MCC: 0.8901
Best model saved for fold 6 at epoch 5

[Fold 6] Epoch 6/10


  with autocast():


Train Loss: 1.3394
Val ACC: 0.9312, PRE: 0.9462, REC: 0.9144, F1: 0.9300, MCC: 0.8629

[Fold 6] Epoch 7/10


  with autocast():


Train Loss: 1.3310
Val ACC: 0.9312, PRE: 0.9548, REC: 0.9052, F1: 0.9294, MCC: 0.8636

[Fold 6] Epoch 8/10


  with autocast():


Train Loss: 1.3275
Val ACC: 0.9434, PRE: 0.9448, REC: 0.9419, F1: 0.9433, MCC: 0.8869

[Fold 6] Epoch 9/10


  with autocast():


Train Loss: 1.3171
Val ACC: 0.9358, PRE: 0.9582, REC: 0.9113, F1: 0.9342, MCC: 0.8726

[Fold 6] Epoch 10/10


  with autocast():


Train Loss: 1.3175
Val ACC: 0.9388, PRE: 0.9498, REC: 0.9266, F1: 0.9381, MCC: 0.8779
[Fold 6] Best ACC: 0.9450


[Fold 7] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.5406
Val ACC: 0.9312, PRE: 0.9029, REC: 0.9664, F1: 0.9335, MCC: 0.8645
Best model saved for fold 7 at epoch 1

[Fold 7] Epoch 2/10


  with autocast():


Train Loss: 1.3863
Val ACC: 0.9281, PRE: 0.9046, REC: 0.9572, F1: 0.9302, MCC: 0.8577

[Fold 7] Epoch 3/10


  with autocast():


Train Loss: 1.3604
Val ACC: 0.9220, PRE: 0.9233, REC: 0.9205, F1: 0.9219, MCC: 0.8440

[Fold 7] Epoch 4/10


  with autocast():


Train Loss: 1.3554
Val ACC: 0.9281, PRE: 0.9242, REC: 0.9327, F1: 0.9285, MCC: 0.8563

[Fold 7] Epoch 5/10


  with autocast():


Train Loss: 1.3493
Val ACC: 0.9388, PRE: 0.9335, REC: 0.9450, F1: 0.9392, MCC: 0.8777
Best model saved for fold 7 at epoch 5

[Fold 7] Epoch 6/10


  with autocast():


Train Loss: 1.3330
Val ACC: 0.9281, PRE: 0.9000, REC: 0.9633, F1: 0.9306, MCC: 0.8584

[Fold 7] Epoch 7/10


  with autocast():


Train Loss: 1.3320
Val ACC: 0.9358, PRE: 0.9412, REC: 0.9297, F1: 0.9354, MCC: 0.8716

[Fold 7] Epoch 8/10


  with autocast():


Train Loss: 1.3171
Val ACC: 0.9144, PRE: 0.9625, REC: 0.8624, F1: 0.9097, MCC: 0.8333

[Fold 7] Epoch 9/10


  with autocast():


Train Loss: 1.3141
Val ACC: 0.9358, PRE: 0.9130, REC: 0.9633, F1: 0.9375, MCC: 0.8729

[Fold 7] Epoch 10/10


  with autocast():


Train Loss: 1.2964
Val ACC: 0.9312, PRE: 0.9147, REC: 0.9511, F1: 0.9325, MCC: 0.8631
[Fold 7] Best ACC: 0.9388


[Fold 8] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.5265
Val ACC: 0.9281, PRE: 0.8977, REC: 0.9664, F1: 0.9308, MCC: 0.8588
Best model saved for fold 8 at epoch 1

[Fold 8] Epoch 2/10


  with autocast():


Train Loss: 1.3893
Val ACC: 0.9297, PRE: 0.9194, REC: 0.9419, F1: 0.9305, MCC: 0.8596
Best model saved for fold 8 at epoch 2

[Fold 8] Epoch 3/10


  with autocast():


Train Loss: 1.3647
Val ACC: 0.9358, PRE: 0.9467, REC: 0.9235, F1: 0.9350, MCC: 0.8718
Best model saved for fold 8 at epoch 3

[Fold 8] Epoch 4/10


  with autocast():


Train Loss: 1.3612
Val ACC: 0.9312, PRE: 0.8939, REC: 0.9786, F1: 0.9343, MCC: 0.8663

[Fold 8] Epoch 5/10


  with autocast():


Train Loss: 1.3441
Val ACC: 0.9388, PRE: 0.9159, REC: 0.9664, F1: 0.9405, MCC: 0.8790
Best model saved for fold 8 at epoch 5

[Fold 8] Epoch 6/10


  with autocast():


Train Loss: 1.3541
Val ACC: 0.9404, PRE: 0.9186, REC: 0.9664, F1: 0.9419, MCC: 0.8819
Best model saved for fold 8 at epoch 6

[Fold 8] Epoch 7/10


  with autocast():


Train Loss: 1.3265
Val ACC: 0.9358, PRE: 0.8969, REC: 0.9847, F1: 0.9388, MCC: 0.8758

[Fold 8] Epoch 8/10


  with autocast():


Train Loss: 1.3273
Val ACC: 0.8930, PRE: 0.9707, REC: 0.8104, F1: 0.8833, MCC: 0.7969

[Fold 8] Epoch 9/10


  with autocast():


Train Loss: 1.3130
Val ACC: 0.9235, PRE: 0.8946, REC: 0.9602, F1: 0.9263, MCC: 0.8494

[Fold 8] Epoch 10/10


  with autocast():


Train Loss: 1.3110
Val ACC: 0.9220, PRE: 0.8988, REC: 0.9511, F1: 0.9242, MCC: 0.8455
[Fold 8] Best ACC: 0.9404


[Fold 9] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.6587
Val ACC: 0.9159, PRE: 0.8820, REC: 0.9602, F1: 0.9195, MCC: 0.8351
Best model saved for fold 9 at epoch 1

[Fold 9] Epoch 2/10


  with autocast():


Train Loss: 1.3730
Val ACC: 0.9052, PRE: 0.8591, REC: 0.9694, F1: 0.9109, MCC: 0.8172

[Fold 9] Epoch 3/10


  with autocast():


Train Loss: 1.3776
Val ACC: 0.9327, PRE: 0.9224, REC: 0.9450, F1: 0.9335, MCC: 0.8657
Best model saved for fold 9 at epoch 3

[Fold 9] Epoch 4/10


  with autocast():


Train Loss: 1.3428
Val ACC: 0.9388, PRE: 0.9470, REC: 0.9297, F1: 0.9383, MCC: 0.8778
Best model saved for fold 9 at epoch 4

[Fold 9] Epoch 5/10


  with autocast():


Train Loss: 1.3419
Val ACC: 0.9251, PRE: 0.9399, REC: 0.9083, F1: 0.9238, MCC: 0.8506

[Fold 9] Epoch 6/10


  with autocast():


Train Loss: 1.3293
Val ACC: 0.9343, PRE: 0.9303, REC: 0.9388, F1: 0.9346, MCC: 0.8685

[Fold 9] Epoch 7/10


  with autocast():


Train Loss: 1.3313
Val ACC: 0.9343, PRE: 0.9522, REC: 0.9144, F1: 0.9329, MCC: 0.8692

[Fold 9] Epoch 8/10


  with autocast():


Train Loss: 1.3145
Val ACC: 0.9343, PRE: 0.9551, REC: 0.9113, F1: 0.9327, MCC: 0.8694

[Fold 9] Epoch 9/10


  with autocast():


Train Loss: 1.3149
Val ACC: 0.9174, PRE: 0.8845, REC: 0.9602, F1: 0.9208, MCC: 0.8379

[Fold 9] Epoch 10/10


  with autocast():


Train Loss: 1.3095
Val ACC: 0.9128, PRE: 0.9623, REC: 0.8593, F1: 0.9079, MCC: 0.8305
[Fold 9] Best ACC: 0.9388


[Fold 10] Epoch 1/10


  scaler = GradScaler()
  with autocast():


Train Loss: 1.5349
Val ACC: 0.9251, PRE: 0.9427, REC: 0.9052, F1: 0.9236, MCC: 0.8508
Best model saved for fold 10 at epoch 1

[Fold 10] Epoch 2/10


  with autocast():


Train Loss: 1.3829
Val ACC: 0.9297, PRE: 0.9297, REC: 0.9297, F1: 0.9297, MCC: 0.8593
Best model saved for fold 10 at epoch 2

[Fold 10] Epoch 3/10


  with autocast():


Train Loss: 1.3651
Val ACC: 0.9235, PRE: 0.9086, REC: 0.9419, F1: 0.9249, MCC: 0.8477

[Fold 10] Epoch 4/10


  with autocast():


Train Loss: 1.3534
Val ACC: 0.9205, PRE: 0.9393, REC: 0.8991, F1: 0.9187, MCC: 0.8418

[Fold 10] Epoch 5/10


  with autocast():


Train Loss: 1.3394
Val ACC: 0.9312, PRE: 0.9075, REC: 0.9602, F1: 0.9331, MCC: 0.8638
Best model saved for fold 10 at epoch 5

[Fold 10] Epoch 6/10


  with autocast():


Train Loss: 1.3361
Val ACC: 0.9190, PRE: 0.8870, REC: 0.9602, F1: 0.9222, MCC: 0.8408

[Fold 10] Epoch 7/10


  with autocast():


Train Loss: 1.3184
Val ACC: 0.9281, PRE: 0.9321, REC: 0.9235, F1: 0.9278, MCC: 0.8563

[Fold 10] Epoch 8/10


  with autocast():


Train Loss: 1.3160
Val ACC: 0.9174, PRE: 0.9003, REC: 0.9388, F1: 0.9192, MCC: 0.8356

[Fold 10] Epoch 9/10


  with autocast():


Train Loss: 1.3104
Val ACC: 0.9251, PRE: 0.9137, REC: 0.9388, F1: 0.9261, MCC: 0.8505

[Fold 10] Epoch 10/10


  with autocast():


Train Loss: 1.3063
Val ACC: 0.9297, PRE: 0.9271, REC: 0.9327, F1: 0.9299, MCC: 0.8593
[Fold 10] Best ACC: 0.9312

Mean ACC: 0.9401 ± 0.0058
Mean PRE: 0.9264
Mean REC: 0.9260
Mean F1:  0.9252
Mean MCC: 0.8523
