In [1]:
import json
import torch
from torch.utils.data import DataLoader, TensorDataset
import os
from config import base_config
from sklearn.metrics import classification_report
os.environ['CUDA_VISIBLE_DEVICES'] = '9'  # 设置环境变量
import random
#设定随机种子
torch.manual_seed(9)
# 检查是否有可用的GPU设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

def read_json(dataset_path,label_space,sample_num=-1):
    """
    input: dataset_path: str
    output: ture_confidence_list, predict_confidence_list 
    len(all_data) * seq_len * len(label_space)
    在AIGA数据集中为len(data) * k-shot+1 * 2
    """
    label_index = {label: idx for idx, label in enumerate(label_space)}
    with open(dataset_path, "r", encoding="utf-8") as f:
        all_data = f.readlines()
        if sample_num != -1:
            all_data = random.sample(all_data, sample_num)
        all_ture_confidence_list = []
        all_predict_confidence_list = []
        all_prior_confidences = []
        all_cc_prior_confidences = []
        all_LinC = []
        for line in all_data:
            data = json.loads(line)
            # calculate true confidence
            label_distribution = data['label_distribution']
            true_confidence_list = [] # seq_len * num_labels
            for label in label_distribution:
                true_confidence = [0] * len(label_space)
                true_confidence[label_index[label]] = 1
                true_confidence_list.append(true_confidence)
            # calculate predict confidence
            label_token_confidences = data['label_token_confidences']
            predict_confidence_list = [] # seq_len * num_labels
            for k, v in label_token_confidences.items():
                label_token_confidence = list(v.values())
                #使confidence和为1
                label_token_confidence = [i/sum(label_token_confidence) for i in label_token_confidence]
                predict_confidence_list.append(label_token_confidence)
            all_ture_confidence_list.append(true_confidence_list)
            all_predict_confidence_list.append(predict_confidence_list)

            try:
                prior_confidences = list(data['prior_confidences'].values())
                all_prior_confidences.append(prior_confidences)
                cc_prior_confidences = list(data['cc_prior_confidences'].values())
                all_cc_prior_confidences.append(cc_prior_confidences)
                LinC_pram = data['LinC_pram']['weight']
                LinC_pram =[abs(i) for i in LinC_pram]
                all_LinC.append(LinC_pram)
            except:
                all_prior_confidences = []
                all_cc_prior_confidences = []
                all_LinC = []
    return all_ture_confidence_list, all_predict_confidence_list, all_prior_confidences, all_cc_prior_confidences, all_LinC

def batch_calibrate(all_ture_confidence_list,all_predict_confidence_list,all_prior_confidence,label_space):
    all_ture_label = []
    for ture_confidence_list in all_ture_confidence_list:
        ture_label = ture_confidence_list[-1]
        ture_label = max(range(len(label_space)), key=lambda i: ture_label[i])
        all_ture_label.append(ture_label)
        
    all_predict_confidence = []
    for confidence_list in all_predict_confidence_list:
        confidence = confidence_list[-1]
        all_predict_confidence.append(confidence)
    
    # all_ture_label: len(data) * num_labels
    # all_predict_confidence: len(data) * num_labels

    # 计算校准前的标签
    all_predict_label = [] #len(data) * num_labels 独热向量
    for confidence in all_predict_confidence:
        predict_label = max(range(len(label_space)), key=lambda i: confidence[i])
        all_predict_label.append(predict_label)
    
    # 计算classification report
    #report = classification_report(all_ture_label, all_predict_label, target_names=label_space)
    accuracy = sum(1 for l, p in zip(all_ture_label, all_predict_label) if l == p) / len(all_ture_label)
    #print('校准前的准确率：', accuracy)

    # 计算每个类别的平均置信度
    num_labels = len(label_space)
    mean_confidence = [0] * num_labels
    for i in range(num_labels):
        mean_confidence[i] = sum([confidence[i] for confidence in all_predict_confidence]) / len(all_predict_confidence)

    # 计算校准后的置信度：校准前置信度/平均置信度
    all_calibrate_confidence = []
    for confidence in all_predict_confidence:
        calibrate_confidence = [confidence[i] / mean_confidence[i] for i in range(num_labels)]
        all_calibrate_confidence.append(calibrate_confidence)
    
    # 计算校准后的标签
    all_calibrate_label = []
    for confidence in all_calibrate_confidence:
        calibrate_label = max(range(num_labels), key=lambda i: confidence[i])
        all_calibrate_label.append(calibrate_label)

    # 计算classification report
    #report = classification_report(all_ture_label, all_calibrate_label, target_names=label_space)
    accuracy_a = sum(1 for l, p in zip(all_ture_label, all_calibrate_label) if l == p) / len(all_ture_label)
    #print('BC校准后的准确率：', accuracy_a)

    prior_calibrate_confidence = []
    for confidence,prior_confidence in zip(all_predict_confidence,all_prior_confidence):
        calibrate_confidence = [confidence[i] / prior_confidence[i] for i in range(num_labels)]
        prior_calibrate_confidence.append(calibrate_confidence)

    prior_calibrate_label = []
    for confidence in prior_calibrate_confidence:
        calibrate_label = max(range(num_labels), key=lambda i: confidence[i])
        prior_calibrate_label.append(calibrate_label)
    accuracy_b = sum(1 for l, p in zip(all_ture_label, prior_calibrate_label) if l == p) / len(all_ture_label)
    #print('10-BC校准后的准确率：', accuracy_b)

    return accuracy,accuracy_a, accuracy_b,prior_calibrate_confidence

def multiclass_pointwise_surprise(prior, true_label_onehot, eps=0):
    """
    prior: torch.Tensor, (batch_size, seq_len, num_class)，概率分布
    true_label_onehot: torch.Tensor, (batch_size, seq_len, num_class)，one-hot向量
    return: torch.Tensor, (batch_size, seq_len, num_class), 每个类别的surprise分数
    """
    # 避免 log(0) 出现 nan
    #surprise_true = torch.log(1.0 / (prior + eps))
    surprise_true = 1
    #surprise_false = torch.log(1.0 - prior + eps)
    surprise_false = -surprise_true
    surprise = true_label_onehot * surprise_true + (1 - true_label_onehot) * surprise_false
    return surprise

import torch
from torch.utils.data import TensorDataset, DataLoader

def data_reader(all_true_confidence_train, all_predict_confidence_train,
               all_true_confidence_test, all_predict_confidence_test):
    """
    读取并处理数据，分离支持示例和查询示例。

    参数:
        all_true_confidence_train (list or np.array): 训练集真实置信度(ture label)，形状 (num_train, k+1, num_labels)
        all_predict_confidence_train (list or np.array): 训练集预测置信度，形状 (num_train, k+1, num_labels)
        all_true_confidence_test (list or np.array): 测试集真实置信度，形状 (num_test, k+1, num_labels)
        all_predict_confidence_test (list or np.array): 测试集预测置信度，形状 (num_test, k+1, num_labels)
        k (int): 支持示例的数量

    返回:
        train_loader, test_loader: 训练集和测试集的数据加载器
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #k是第二维度的长度-1
    k = len(all_true_confidence_train[0]) - 1

    all_true_confidence_train = torch.tensor(all_true_confidence_train, dtype=torch.float32).to(device)
    all_predict_confidence_train = torch.tensor(all_predict_confidence_train, dtype=torch.float32).to(device)
    all_true_confidence_test = torch.tensor(all_true_confidence_test, dtype=torch.float32).to(device)
    all_predict_confidence_test = torch.tensor(all_predict_confidence_test, dtype=torch.float32).to(device)

    # 计算 delta_confidence
    delta_confidence_train = all_true_confidence_train - all_predict_confidence_train
    delta_confidence_test = all_true_confidence_test - all_predict_confidence_test
    # delta_confidence_train = all_true_confidence_train 
    # delta_confidence_test = all_true_confidence_test 
    #计算每个类别的surprise分数，形状为 (num_train, k+1, num_labels)
    surprise_train = multiclass_pointwise_surprise(all_predict_confidence_train, all_true_confidence_train)
    surprise_test = multiclass_pointwise_surprise(all_predict_confidence_test, all_true_confidence_test)

    # 分离支持示例和查询示例
    support_surprise_train = surprise_train[:, :k, :]  # 形状: (num_train, k, num_labels)
    query_surprise_train = surprise_train[:, k, :]     # 形状: (num_train, num_labels)
    support_deltas_train = delta_confidence_train[:, :k, :]  # 形状: (num_train, k, num_labels)
    query_deltas_train = delta_confidence_train[:, k, :]     # 形状: (num_train, num_labels)
    p_pred_query_train = all_predict_confidence_train[:, k, :]  # 形状: (num_train, num_labels)
    p_true_query_train = all_true_confidence_train[:, k, :]     # 形状: (num_train, num_labels)

    support_surprise_test = surprise_test[:, :k, :]     # 形状: (num_test, k, num_labels)
    query_surprise_test = surprise_test[:, k, :]        # 形状: (num_test, num_labels)
    support_deltas_test = delta_confidence_test[:, :k, :]  # 形状: (num_test, k, num_labels)
    query_deltas_test = delta_confidence_test[:, k, :]     # 形状: (num_test, num_labels)
    p_pred_query_test = all_predict_confidence_test[:, k, :]  # 形状: (num_test, num_labels)
    p_true_query_test = all_true_confidence_test[:, k, :]     # 形状: (num_test, num_labels)

    # 创建训练集和测试集
    train_dataset = TensorDataset(support_surprise_train, p_pred_query_train, query_surprise_train, p_true_query_train)
    test_dataset = TensorDataset(support_surprise_test, p_pred_query_test, query_surprise_test, p_true_query_test)
    # train_dataset = TensorDataset(support_deltas_train, p_pred_query_train, query_deltas_train, p_true_query_train)
    # test_dataset = TensorDataset(support_deltas_test, p_pred_query_test, query_deltas_test, p_true_query_test)
    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=15000, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=False)

    return train_loader, test_loader

import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class Transformer_encoder(nn.Module):
    def __init__(self, num_labels, embed_dim=768, num_heads=16, num_layers=4, dropout=0):
        """
        层级递归映射校准模型

        参数:
            num_labels (int): 标签的数量
            embed_dim (int): 嵌入维度
            num_heads (int): Transformer 的头数
            num_layers (int): Transformer 编码器的层数
            dropout (float): Dropout 比例
        """
        super(Transformer_encoder, self).__init__()
        self.num_labels = num_labels
        self.embed_dim = embed_dim

        # 输入嵌入层，将 delta_confidence 映射到嵌入空间
        self.input_projection = nn.Linear(num_labels, embed_dim)

        # Transformer 编码器
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # 聚合层，可以使用平均池化或其他策略
        self.pool = nn.AdaptiveAvgPool1d(1)

        # 输出层，将聚合后的表示映射到 delta_confidence
        self.output_projection = nn.Linear(embed_dim, num_labels)

    def positional_encoding(self, length, embed_dim, device):
        """
        生成位置编码

        参数:
            length (int): 序列长度
            embed_dim (int): 嵌入维度
            device (torch.device): 设备（CPU/GPU）

        返回:
            torch.Tensor: 位置编码，形状 (length, embed_dim)
        """
        position = torch.arange(length, dtype=torch.float, device=device).unsqueeze(1)  # (length, 1)
        div_term = torch.exp(torch.arange(0, embed_dim, 2, dtype=torch.float, device=device) * 
                             -(math.log(10000.0) / embed_dim))  # (embed_dim // 2,)
        pe = torch.zeros(length, embed_dim, device=device)  # (length, embed_dim)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe
    def forward(self, support_deltas):
        """
        前向传播

        参数:
            support_deltas (torch.Tensor): 支持示例的 delta_confidence，形状 (batch_size, k, num_labels)

        返回:
            delta_pred (torch.Tensor): 预测的查询示例的 delta_confidence，形状 (batch_size, num_labels)
        """
        # 支持示例嵌入
        # 输入形状转换为 (k, batch_size, embed_dim) 以适应 Transformer 的输入要求
        embedded = self.input_projection(support_deltas)  # (batch_size, k, embed_dim)
        embedded = embedded.permute(1, 0, 2)  # (k, batch_size, embed_dim)

        # 获取embeded第一个维度的长度
        k = embedded.size(0)
        pos_encoding = self.positional_encoding(k, self.embed_dim, device)  # (k, embed_dim)
        embedded = embedded + pos_encoding.unsqueeze(1)  # 广播到 (k, batch_size, embed_dim)

        # Transformer 编码
        transformer_output = self.transformer_encoder(embedded)  # (k, batch_size, embed_dim)

        # # 转回 (batch_size, embed_dim, k)
        # transformer_output = transformer_output.permute(1, 2, 0)  # (batch_size, embed_dim, k)

        # # 聚合，例如使用平均池化
        # aggregated = self.pool(transformer_output).squeeze(-1)  # (batch_size, embed_dim)

        last_hidden_state = transformer_output[-1, :, :]  # (batch_size, embed_dim)

        # 输出层
        delta_pred = self.output_projection(last_hidden_state)  # (batch_size, num_labels)

        return delta_pred
    
import torch
import torch.nn as nn
import torch.nn.init as init
import math

class LSTMModel(nn.Module):
    def __init__(self, num_labels, embed_dim=768, hidden_dim=1024, num_layers=1, dropout=0):
        """
        层级递归映射校准模型（基于RNN）

        参数:
            num_labels (int): 标签的数量
            embed_dim (int): 嵌入维度
            hidden_dim (int): RNN 隐藏状态的维度
            num_layers (int): RNN 层数
            dropout (float): Dropout 比例
        """
        super(LSTMModel, self).__init__()
        self.num_labels = num_labels
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        # 输入嵌入层，将 delta_confidence 映射到嵌入空间
        self.input_projection = nn.Linear(num_labels, embed_dim)
        # 为输入添加 Layer Normalization
        self.layer_norm = nn.LayerNorm(embed_dim)
        # 使用 LSTM 作为递归神经网络（可以选择 GRU 或其他 RNN 变种）
        self.rnn = nn.LSTM(input_size=embed_dim, hidden_size=hidden_dim, num_layers=num_layers, 
                           dropout=dropout, batch_first=True)
        # 输出层，将 RNN 最后一时间步的输出映射到 delta_confidence
        self.output_projection = nn.Linear(hidden_dim, num_labels)
        # 初始化权重
        #self.init_lstm_weights()    def init_lstm_weights(self):
        # LSTM层的权重初始化：正交初始化
        for name, param in self.rnn.named_parameters():
            if 'weight_ih' in name:  # 输入门（input weights）
                init.orthogonal_(param.data)
            elif 'weight_hh' in name:  # 隐藏门（hidden weights）
                init.orthogonal_(param.data)
            elif 'bias' in name:  # 偏置项初始化为零
                init.zeros_(param.data)
    def forward(self, support_deltas):
        """
        前向传播

        参数:
            support_deltas (torch.Tensor): 支持示例的 delta_confidence，形状 (batch_size, k, num_labels)

        返回:
            delta_pred (torch.Tensor): 预测的查询示例的 delta_confidence，形状 (batch_size, num_labels)
        """
        # 支持示例嵌入
        # 输入形状转换为 (batch_size, k, embed_dim) 以适应 LSTM 的输入要求
        embedded = self.input_projection(support_deltas)  # (batch_size, k, embed_dim)

        # Layer Normalization
        normalized_embedded = self.layer_norm(embedded) # (batch_size, k, hidden_dim)

        # LSTM 编码
        rnn_output, _ = self.rnn(normalized_embedded)  # rnn_output: (batch_size, k, hidden_dim)

        # 获取 RNN 输出的最后一个时间步的隐藏状态
        last_hidden_state = rnn_output[:, -1, :]  # (batch_size, hidden_dim)

        # 输出层
        delta_pred = self.output_projection(last_hidden_state)  # (batch_size, num_labels)

        return delta_pred

import torch
import torch.nn as nn
from torch.nn import init

class GRUModel(nn.Module):
    def __init__(self, num_labels, embed_dim=768, hidden_dim=1024, num_layers=1, dropout=0):
        """
        层级递归映射校准模型（基于GRU）

        参数:
            num_labels (int): 标签的数量
            embed_dim (int): 嵌入维度
            hidden_dim (int): RNN 隐藏状态的维度
            num_layers (int): RNN 层数
            dropout (float): Dropout 比例
        """
        super(GRUModel, self).__init__()
        self.num_labels = num_labels
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        
        # 输入嵌入层，将 delta_confidence 映射到嵌入空间
        self.input_projection = nn.Linear(num_labels, embed_dim)
        # 为输入添加 Layer Normalization
        self.layer_norm = nn.LayerNorm(embed_dim)
        
        # 使用 GRU 作为递归神经网络（可以选择 GRU 或其他 RNN 变种）
        self.rnn = nn.GRU(input_size=embed_dim, hidden_size=hidden_dim, num_layers=num_layers, 
                          dropout=dropout, batch_first=True)
        
        # 输出层，将 GRU 最后一时间步的输出映射到 delta_confidence
        self.output_projection = nn.Linear(hidden_dim, num_labels, bias=True)
        
        # 初始化权重
        self.init_gru_weights()

    def init_gru_weights(self):
        # GRU层的权重初始化：正交初始化
        for name, param in self.rnn.named_parameters():
            if 'weight_ih' in name:  # 输入门（input weights）
                init.orthogonal_(param.data)
            elif 'weight_hh' in name:  # 隐藏门（hidden weights）
                init.orthogonal_(param.data)
            elif 'bias' in name:  # 偏置项初始化为零
                init.zeros_(param.data)

    def forward(self, support_deltas, p_pred_logits):
        """
        前向传播

        参数:
            support_deltas (torch.Tensor): 支持示例的 delta_confidence，形状 (batch_size, k, num_labels)
            p_pred_logits (torch.Tensor): 查询示例的预测置信度，形状 (batch_size, num_labels)

        返回:
            delta_pred (torch.Tensor): 预测的查询示例的 delta_confidence，形状 (batch_size, num_labels)
        """
        # 支持示例嵌入
        # 输入形状转换为 (batch_size, k, embed_dim) 以适应 GRU 的输入要求
        embedded = self.input_projection(support_deltas)  # (batch_size, k, embed_dim)

        # Layer Normalization
        normalized_embedded = self.layer_norm(embedded)  # (batch_size, k, embed_dim)

        # GRU 编码
        rnn_output, _ = self.rnn(normalized_embedded)  # rnn_output: (batch_size, k, hidden_dim)

        # 获取 GRU 输出的最后一个时间步的隐藏状态
        last_hidden_state = rnn_output[:, -1, :]  # (batch_size, hidden_dim)

        # 输出层
        prior_pred = self.output_projection(last_hidden_state)  # (batch_size, num_labels)

        # 计算校准后的logits
        cal_logits=p_pred_logits - prior_pred  # (batch_size, num_labels)

        return cal_logits

    
class RNNModel(nn.Module):
    def __init__(self, num_labels, embed_dim=768, hidden_dim=1024, num_layers=1, dropout=0):
        """
        层级递归映射校准模型（基于普通RNN）

        参数:
            num_labels (int): 标签的数量
            embed_dim (int): 嵌入维度
            hidden_dim (int): RNN 隐藏状态的维度
            num_layers (int): RNN 层数
            dropout (float): Dropout 比例
        """
        super(RNNModel, self).__init__()
        self.num_labels = num_labels
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        
        # 输入嵌入层，将 delta_confidence 映射到嵌入空间
        self.input_projection = nn.Linear(num_labels, embed_dim)
        # 为输入添加 Layer Normalization
        self.layer_norm = nn.LayerNorm(embed_dim)
        
        # 使用普通RNN
        self.rnn = nn.RNN(input_size=embed_dim, hidden_size=hidden_dim, num_layers=num_layers,
                         dropout=dropout, batch_first=True)
        
        # 输出层，将RNN最后一时间步的输出映射到delta_confidence
        self.output_projection = nn.Linear(hidden_dim, num_labels)

    def forward(self, support_deltas):
        """
        前向传播

        参数:
            support_deltas (torch.Tensor): 支持示例的 delta_confidence，形状 (batch_size, k, num_labels)

        返回:
            delta_pred (torch.Tensor): 预测的查询示例的 delta_confidence，形状 (batch_size, num_labels)
        """
        # 支持示例嵌入
        embedded = self.input_projection(support_deltas)  # (batch_size, k, embed_dim)
        
        # Layer Normalization
        normalized_embedded = self.layer_norm(embedded)   # (batch_size, k, embed_dim)
        
        # RNN编码
        rnn_output, _ = self.rnn(normalized_embedded)     # rnn_output: (batch_size, k, hidden_dim)
        
        # 获取最后一个时间步的隐藏状态
        last_hidden_state = rnn_output[:, -1, :]          # (batch_size, hidden_dim)
        
        # 输出层
        delta_pred = self.output_projection(last_hidden_state)  # (batch_size, num_labels)
        
        return delta_pred



import torch.optim as optim
from tqdm import tqdm

def train_model(model, train_loader, num_epochs, learning_rate, device, p_pred_train):
    """
    训练模型

    参数:
        model (nn.Module): 模型
        train_loader (DataLoader): 训练数据加载器
        num_epochs (int): 训练轮数
        learning_rate (float): 学习率
        device (torch.device): 设备
        p_pred_train (torch.Tensor): 训练集预测置信度，形状 (num_train, k+1, num_labels)

    返回:
        model: 训练后的模型
    """
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        # for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        for batch in train_loader:
            support_deltas, p_pred_query, query_deltas, p_true_query = batch
            support_deltas = support_deltas.to(device)          # (batch_size, k, num_labels)
            p_pred_query = p_pred_query.to(device)              # (batch_size, num_labels)
            query_deltas = query_deltas.to(device)              # (batch_size, num_labels)
            p_true_query = p_true_query.to(device)              # (batch_size, num_labels)

            optimizer.zero_grad()

            p_pred_logits = torch.log(p_pred_query)  # (batch_size, num_labels)

            # 前向传播
            delta_pred = model(support_deltas, p_pred_logits)                  # (batch_size, num_labels)

            # 校准后的置信度
            hat_p =  delta_pred                   # (batch_size, num_labels)

            # 归一化 hat_p 使其成为有效的概率分布
            #hat_p = hat_p / hat_p.sum(dim=1, keepdim=True)
            #hat_p = hat_p / hat_p.norm(dim=1, keepdim=True)
            #hat_p = F.softmax(hat_p, dim=1)
            #hat_p = torch.logsumexp(hat_p, dim=1)

            # 计算交叉熵损失
            # 需要将 p_true_query 转换为类别标签
            # p_true_query 是 one-hot 编码
            _, targets = torch.max(p_true_query, dim=1)        # (batch_size)

            loss = criterion(hat_p, targets)

            # 反向传播与优化
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        if epoch % 10 == 0:
            #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")
            continue

    return model

def test_model(model, test_loader, device):
    """
    测试模型

    参数:
        model (nn.Module): 训练好的模型
        test_loader (DataLoader): 测试数据加载器
        device (torch.device): 设备

    返回:
        accuracy: 测试集上的准确率
    """
    model.eval()
    correct = 0
    total = 0
    all_pred = []
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            support_deltas, p_pred_query, query_deltas, p_true_query = batch
            support_deltas = support_deltas.to(device)          # (batch_size, k, num_labels)
            p_pred_query = p_pred_query.to(device)              # (batch_size, num_labels)
            query_deltas = query_deltas.to(device)              # (batch_size, num_labels)
            p_true_query = p_true_query.to(device)              # (batch_size, num_labels)


            p_pred_logits = torch.log(p_pred_query)

            # 前向传播
            delta_pred = model(support_deltas,p_pred_logits)                  # (batch_size, num_labels)

            # 校准后的置信度
            hat_p =   delta_pred                   # (batch_size, num_labels)
            #hat_p = torch.clamp(hat_p, min=1e-6, max=1.0) # 确保 hat_p 在有效范围内
            #hat_p = hat_p / hat_p.sum(dim=1, keepdim=True)
            #hat_p = hat_p / hat_p.norm(dim=1, keepdim=True)
            hat_p = F.softmax(hat_p, dim=1)
            #hat_p = torch.logsumexp(hat_p, dim=1)
            # 预测类别
            _, predicted = torch.max(hat_p, dim=1)              # (batch_size)

            # 真实类别
            _, targets = torch.max(p_true_query, dim=1)        # (batch_size)

            total += targets.size(0)
            
            correct += (predicted == targets).sum().item()

            all_pred.append(hat_p.cpu().numpy())

    accuracy = correct / total
    #print(f"Test Accuracy: {accuracy * 100:.2f}%")
    return accuracy, all_pred

def train_LinC_model(model, train_loader, num_epochs, learning_rate, device, p_pred_train):
    """
    训练模型

    参数:
        model (nn.Module): 模型
        train_loader (DataLoader): 训练数据加载器
        num_epochs (int): 训练轮数
        learning_rate (float): 学习率
        device (torch.device): 设备
        p_pred_train (torch.Tensor): 训练集预测置信度，形状 (num_train, k+1, num_labels)

    返回:
        model: 训练后的模型
    """
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            support_deltas, p_pred_query, query_deltas, p_true_query = batch
            support_deltas = support_deltas.to(device)          # (batch_size, k, num_labels)
            query_deltas = query_deltas.to(device)              # (batch_size, num_labels)

            p_pred_query = p_pred_query.to(device)              # (batch_size, num_labels)
            p_true_query = p_true_query.to(device)              # (batch_size, num_labels)

            optimizer.zero_grad()

            # 前向传播
            hat_p = model(p_pred_query)                  # (batch_size, num_labels)
            # 计算交叉熵损失
            # p_true_query 是 one-hot 编码
            _, targets = torch.max(p_true_query, dim=1)        # (batch_size)

            loss = criterion(hat_p, targets)

            # 反向传播与优化
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        if epoch % 10 == 0:
            #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")
            continue

    return model

def test_LinC_model(model, test_loader, device):
    """
    测试模型

    参数:
        model (nn.Module): 训练好的模型
        test_loader (DataLoader): 测试数据加载器
        device (torch.device): 设备

    返回:
        accuracy: 测试集上的准确率
    """
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            support_deltas, p_pred_query, query_deltas, p_true_query = batch
            support_deltas = support_deltas.to(device)          # (batch_size, k, num_labels)
            p_pred_query = p_pred_query.to(device)              # (batch_size, num_labels)
            query_deltas = query_deltas.to(device)              # (batch_size, num_labels)
            p_true_query = p_true_query.to(device)              # (batch_size, num_labels)

            # 前向传播
            hat_p = model(p_pred_query)                  # (batch_size, num_labels)

            # 预测类别
            _, predicted = torch.max(hat_p, dim=1)              # (batch_size)

            # 真实类别
            _, targets = torch.max(p_true_query, dim=1)        # (batch_size)

            total += targets.size(0) 
            correct += (predicted == targets).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    return accuracy


class LinC(nn.Module):
    def __init__(self, num_labels):
        super(LinC, self).__init__()
        self.classifier = nn.Linear(num_labels, num_labels, bias=True)
    def forward(self, x):
        clf_logits = self.classifier(x)
        return clf_logits

Using device: cuda


In [None]:
import random
import numpy as np
import torch
import time
import json
from pathlib import Path
from datetime import datetime

# ──────────────── 数据集标签空间 ────────────────
dataset_label = {
    "MNLI":     ['no','maybe','yes'],
    "SST-2":    ['negative', 'positive'],
    "MRPC":     ['no','yes'],
    "QNLI":     ['no','yes'],
    "RTE":      ['no','yes'],
    "WiC":      ['false', 'true'],
    "YouTube":  ['truthful','deceptive'],
    "AI-GA_1-1":['0', '1'],
}

# ──────────────── 实验配置 ────────────────
seeds         = [42, 2025, 1234]
datasets      = ["MNLI","SST-2","MRPC","QNLI","RTE","WiC","YouTube","AI-GA_1-1"]
shot          = "3"
param         = "7"
model_name    = "qwen"
device        = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs    = 200
learning_rate = 1e-4


# ──────────────── 工具函数 ────────────────
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    # 保证 cudnn 的确定性
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False


def reset_gpu_mem_stats():
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats(device)


def get_gpu_peak():
    if torch.cuda.is_available():
        return torch.cuda.max_memory_allocated(device)
    return 0


# ──────────────── 单个实验 ────────────────
def run_one(seed: int, dataset: str):
    # 1. 固定随机种子
    set_seed(seed)

    # 2. 构造路径
    train_data_path = (
        f"/workspace/ICL_calibration_v2/cache_0216/"
        f"{dataset}/{model_name}_{param}b-top_bm25-{shot}increase-probe.jsonl"
    )
    test_data_path = (
        f"/workspace/ICL_calibration_v2/output_0216/"
        f"{dataset}/{model_name}_{param}b-top_bm25-{shot}increase.jsonl"
    )

    # 3. 读取数据
    label_space = dataset_label[dataset]
    all_true_conf_train, all_pred_conf_train, _, _, _ = read_json(
        train_data_path, label_space, sample_num=-1
    )
    all_true_conf_test,  all_pred_conf_test,  _, _, _ = read_json(
        test_data_path, label_space
    )

    # 4. 构建 DataLoader
    train_loader, test_loader = data_reader(
        all_true_conf_train,
        all_pred_conf_train,
        all_true_conf_test,
        all_pred_conf_test
    )

    # 5. 初始化模型
    model = GRUModel(num_labels=len(label_space)).to(device)

    # ≫ 训练阶段监控
    reset_gpu_mem_stats()
    train_start = time.time()

    model = train_model(
        model,
        train_loader,
        num_epochs,
        learning_rate,
        device,
        all_pred_conf_train
    )

    train_elapsed   = time.time() - train_start
    train_gpu_peak  = get_gpu_peak()

    # ≫ 推理阶段监控
    reset_gpu_mem_stats()
    infer_start = time.time()

    accuracy, _ = test_model(model, test_loader, device)

    infer_elapsed   = time.time() - infer_start
    infer_gpu_peak  = get_gpu_peak()
    infer_avg_time  = infer_elapsed / len(test_loader.dataset)

    metrics = {
        # 训练
        "train_time_s":          train_elapsed,
        "train_gpu_peak_bytes":  train_gpu_peak,
        # 推理
        "infer_time_total_s":    infer_elapsed,
        "infer_time_per_sample_s": infer_avg_time,
        "infer_gpu_peak_bytes":  infer_gpu_peak,
    }
    return accuracy, metrics


# ──────────────── 主流程 ────────────────
def main():
    results = {}
    for ds in datasets:
        accs = []
        metrics_all = []
        for sd in seeds:
            print(f"Running dataset={ds}, seed={sd} ...")
            acc, m = run_one(sd, ds)
            print(
                f"  -> acc={acc:.4f} | "
                f"train {m['train_time_s']:.1f}s /"
                f"{m['train_gpu_peak_bytes']/1e9:.3f}GB | "
                f"infer {m['infer_time_per_sample_s']*1e3:.2f}ms/样本 /"
                f"{m['infer_gpu_peak_bytes']/1e9:.3f}GB"
            )
            accs.append(acc)
            metrics_all.append(m)

        results[ds] = {
            "accuracies":      accs,
            "mean_accuracy":   float(np.mean(accs)),
            "std_accuracy":    float(np.std(accs, ddof=1)),
            "metrics":         metrics_all
        }

    # 生成带时间戳的监控文件名
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_dir   = Path("monitoring_logs")
    out_dir.mkdir(parents=True, exist_ok=True)
    outfile   = out_dir / f"monitor_{model_name}_{param}b_{shot}shot_{timestamp}.json"

    with outfile.open("w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"All experiments done. Results saved to: {outfile}")


if __name__ == "__main__":
    main()


Running dataset=MNLI, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.50it/s]


  -> acc=0.8157 | train 93.4s /3.300GB | infer 0.02ms/样本 /1.098GB
Running dataset=MNLI, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.21it/s]


  -> acc=0.8154 | train 87.9s /3.300GB | infer 0.02ms/样本 /1.098GB
Running dataset=MNLI, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.33it/s]


  -> acc=0.8153 | train 99.6s /3.300GB | infer 0.02ms/样本 /1.098GB
Running dataset=SST-2, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 23.73it/s]


  -> acc=0.9511 | train 35.3s /1.206GB | infer 0.03ms/样本 /0.316GB
Running dataset=SST-2, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 24.04it/s]


  -> acc=0.9511 | train 36.4s /1.206GB | infer 0.02ms/样本 /0.316GB
Running dataset=SST-2, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 21.78it/s]


  -> acc=0.9511 | train 31.0s /1.206GB | infer 0.03ms/样本 /0.316GB
Running dataset=MRPC, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 24.88it/s]


  -> acc=0.7391 | train 25.9s /0.927GB | infer 0.03ms/样本 /0.307GB
Running dataset=MRPC, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 24.43it/s]


  -> acc=0.7391 | train 24.8s /0.927GB | infer 0.03ms/样本 /0.307GB
Running dataset=MRPC, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 24.35it/s]


  -> acc=0.7391 | train 23.0s /0.927GB | infer 0.03ms/样本 /0.307GB
Running dataset=QNLI, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  6.82it/s]


  -> acc=0.8041 | train 35.7s /1.206GB | infer 0.03ms/样本 /0.672GB
Running dataset=QNLI, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.19it/s]


  -> acc=0.8040 | train 29.3s /1.206GB | infer 0.04ms/样本 /0.672GB
Running dataset=QNLI, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  2.78it/s]


  -> acc=0.8041 | train 33.1s /1.206GB | infer 0.07ms/样本 /0.672GB
Running dataset=RTE, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 148.72it/s]


  -> acc=0.8375 | train 17.0s /0.683GB | infer 0.06ms/样本 /0.166GB
Running dataset=RTE, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 127.29it/s]


  -> acc=0.8375 | train 15.9s /0.683GB | infer 0.04ms/样本 /0.166GB
Running dataset=RTE, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 132.72it/s]


  -> acc=0.8375 | train 16.8s /0.683GB | infer 0.04ms/样本 /0.166GB
Running dataset=WiC, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 27.43it/s]


  -> acc=0.6271 | train 30.2s /1.295GB | infer 0.03ms/样本 /0.275GB
Running dataset=WiC, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 30.19it/s]


  -> acc=0.6286 | train 37.3s /1.295GB | infer 0.03ms/样本 /0.275GB
Running dataset=WiC, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 28.52it/s]


  -> acc=0.6293 | train 38.9s /1.295GB | infer 0.03ms/样本 /0.275GB
Running dataset=YouTube, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 90.39it/s]


  -> acc=0.9082 | train 10.0s /0.490GB | infer 0.04ms/样本 /0.177GB
Running dataset=YouTube, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 99.35it/s]


  -> acc=0.9107 | train 10.0s /0.490GB | infer 0.03ms/样本 /0.177GB
Running dataset=YouTube, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00, 102.40it/s]


  -> acc=0.9107 | train 10.9s /0.490GB | infer 0.03ms/样本 /0.177GB
Running dataset=AI-GA_1-1, seed=42 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]


  -> acc=0.7971 | train 28.0s /1.064GB | infer 0.06ms/样本 /0.698GB
Running dataset=AI-GA_1-1, seed=2025 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  6.70it/s]


  -> acc=0.7976 | train 33.2s /1.064GB | infer 0.03ms/样本 /0.698GB
Running dataset=AI-GA_1-1, seed=1234 ...


Testing: 100%|██████████| 1/1 [00:00<00:00,  6.67it/s]

  -> acc=0.7982 | train 30.7s /1.064GB | infer 0.03ms/样本 /0.698GB
All experiments done. Results saved to: monitoring_logs/monitor_qwen_7b_3shot_20250728_145132.json



