In [1]:
!pip install transformers
!pip install datasets
!pip install textattack

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0mLooking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0mLooking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0m

In [2]:
!pip install wandb

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
[0m

In [3]:
import pandas as pd
import torch
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer, BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
import nltk
from nltk.corpus import wordnet
import os
os.environ["WANDB_DISABLED"] = "true"

In [4]:
import wandb
# 下载WordNet
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
# 1. 加载 IMDB 数据集
dataset = load_dataset("imdb")
# Load model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenization function with max_length and padding
def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=256)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
# Tokenize the dataset
dataset = dataset.map(tokenize, batched=True)

In [7]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,               # 增加训练轮数
    per_device_train_batch_size=32,   # 增加批量大小
    per_device_eval_batch_size=64,    # 增加评估批量大小
    warmup_steps=500,                  # 增加预热步数
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=20,                  # 增加日志记录频率
    evaluation_strategy="epoch",       # 每个epoch进行评估
    save_steps=1000,                   # 增加保存步数
    eval_steps=500,                    # 增加评估步数
    learning_rate=5e-5,                # 添加学习率参数                  # Increased eval steps
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [8]:
# Define compute metrics function
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average='weighted')
    recall = recall_score(labels, predictions, average='weighted')
    f1 = f1_score(labels, predictions, average='weighted')
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

In [9]:
# Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],  # Use small subset here
    eval_dataset=dataset['test'],
    compute_metrics=compute_metrics,
)

# Start model training
trainer.train()

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[2024-11-21 17:59:40,312] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


  attn_output = torch.nn.functional.scaled_dot_product_attention(
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.2314,0.263557,0.89124,0.897617,0.89124,0.890802
2,0.1742,0.203964,0.9222,0.922679,0.9222,0.922178
3,0.054,0.291936,0.92336,0.923435,0.92336,0.923357


TrainOutput(global_step=2346, training_loss=0.18716185125510407, metrics={'train_runtime': 1229.945, 'train_samples_per_second': 60.978, 'train_steps_per_second': 1.907, 'total_flos': 9866664576000000.0, 'train_loss': 0.18716185125510407, 'epoch': 3.0})

In [10]:
eval_results = trainer.evaluate()

# Print evaluation results
print("\nEvaluation results:")
for key, value in eval_results.items():
    print(f"{key}: {value:.4f}")


Evaluation results:
eval_loss: 0.2919
eval_accuracy: 0.9234
eval_precision: 0.9234
eval_recall: 0.9234
eval_f1: 0.9234
eval_runtime: 99.3973
eval_samples_per_second: 251.5160
eval_steps_per_second: 3.9340
epoch: 3.0000


In [11]:
from transformers import Trainer, TrainingArguments
import torch
from torch.nn import CrossEntropyLoss

class AdversarialTrainer(Trainer):
    def __init__(self, *args, epsilon=0.1, alpha=0.5, **kwargs):
        """
        初始化对抗训练器
        :param epsilon: 对抗扰动的强度
        :param alpha: 正常损失与对抗损失的权重
        """
        super().__init__(*args, **kwargs)
        self.epsilon = epsilon  # 对抗扰动的强度
        self.alpha = alpha      # 正常损失与对抗损失的加权系数
        self.training = True    # 标记当前是否在训练模式

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        """
        计算损失
        :param model: 训练的模型
        :param inputs: 输入数据，包括 input_ids、labels 和 attention_mask
        :param return_outputs: 是否返回模型输出
        :param num_items_in_batch: 每个批次的样本数量
        :return: 计算得到的总损失
        """
        input_ids = inputs.get("input_ids")  # 获取输入的 ID
        labels = inputs.get("labels")          # 获取标签
        attention_mask = inputs.get("attention_mask")  # 获取注意力掩码

        # 正常前向传播，计算正常损失
        outputs = model(**inputs)
        normal_loss = outputs.loss

        if self.training:
            # 获取输入嵌入并设置为可求导
            embeddings = model.get_input_embeddings()(input_ids).detach()
            embeddings.requires_grad = True

            # 计算对抗输出
            adv_outputs = model(inputs_embeds=embeddings, attention_mask=attention_mask)
            adv_loss = CrossEntropyLoss()(adv_outputs.logits, labels)  # 计算对抗损失

            # 计算总损失，结合正常损失和对抗损失
            total_loss = (1 - self.alpha) * normal_loss + self.alpha * adv_loss
        else:
            total_loss = normal_loss  # 如果不是训练模式，仅返回正常损失

        return (total_loss, outputs) if return_outputs else total_loss  # 根据需要返回损失和输出

# 使用新的训练参数
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,               # 增加训练轮数
    per_device_train_batch_size=32,   # 增加批量大小
    per_device_eval_batch_size=64,    # 增加评估批量大小
    warmup_steps=500,                  # 增加预热步数
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=20,                  # 增加日志记录频率
    evaluation_strategy="epoch",       # 每个epoch进行评估
    save_steps=1000,                   # 增加保存步数
    eval_steps=500,                    # 增加评估步数
    learning_rate=5e-5,                # 添加学习率参数                  # Increased eval steps
)

# 创建训练器
trainer = AdversarialTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    compute_metrics=compute_metrics,

)

# 开始训练
trainer.train()


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.0546,0.30312,0.9146,0.914857,0.9146,0.914587
2,0.0462,0.357199,0.9184,0.918584,0.9184,0.918391
3,0.0267,0.425629,0.9224,0.922496,0.9224,0.922396


TrainOutput(global_step=2346, training_loss=0.046725945998989965, metrics={'train_runtime': 2288.3091, 'train_samples_per_second': 32.775, 'train_steps_per_second': 1.025, 'total_flos': 9866664576000000.0, 'train_loss': 0.046725945998989965, 'epoch': 3.0})

In [12]:


# 3. 定义替换同义词的函数
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())  # 添加同义词
    return list(synonyms)

def replace_with_synonyms(text):
    words = text.split()
    modified_words = []
    
    for word in words:
        if np.random.rand() < 0.2:  # 20% 概率替换单词
            synonyms = get_synonyms(word)
            if synonyms:
                word = np.random.choice(synonyms)  # 随机选择一个同义词
        modified_words.append(word)
        
    return ' '.join(modified_words)

# 4. 定义生成对抗样本的函数（使用替换同义词）
def generate_adversarial_sample_with_replacement(text):
    modified_text = replace_with_synonyms(text)  # 使用同义词替换
    return modified_text

# 5. 生成对抗样本并与 IMDB 数据结合
adversarial_samples = []

for review in dataset['train']['text']:  
    adversarial_sample = generate_adversarial_sample_with_replacement(review)
    adversarial_samples.append(adversarial_sample)

In [13]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# 6. 用原始标签创建新的数据集
original_labels = dataset['train']['label']  # 对应的原始标签
combined_texts = dataset['train']['text']+ adversarial_samples
combined_labels = original_labels + original_labels

# 7. 划分数据集为训练集和验证集
train_texts, val_texts, train_labels, val_labels = train_test_split(combined_texts, combined_labels, test_size=0.1)

# 8. 编码数据
train_encodings = bert_tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = bert_tokenizer(val_texts, truncation=True, padding=True, max_length=512)

# 9. 创建数据集类
class IMDbDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# 10. 创建训练和验证数据集
train_dataset = IMDbDataset(train_encodings, train_labels)
val_dataset = IMDbDataset(val_encodings, val_labels)

# 11. 定义训练参数
training_args = TrainingArguments(
    output_dir='./results/adversarial_model',
    num_train_epochs=3,               # 增加训练轮数
    per_device_train_batch_size=32,   # 增加批量大小
    per_device_eval_batch_size=64,    # 增加评估批量大小
    warmup_steps=500,                  # 增加预热步数
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=20,                  # 增加日志记录频率
    evaluation_strategy="epoch",       # 每个epoch进行评估
    save_steps=1000,                   # 增加保存步数
    eval_steps=500,                    # 增加评估步数
    learning_rate=5e-5,                # 添加学习率参数     
)

# 12. 初始化 BERT 模型
bert_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
# 13. 定义训练器
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# 14. 开始训练
trainer.train()

# 15. 评估模型
adversarial_metrics = trainer.evaluate()
print("生成对抗样本后的模型评估结果：", adversarial_metrics)

# 16. 可视化性能差异 (如果需要)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.0363,0.042059,0.9878,0.987802,0.9878,0.9878
2,0.0134,0.041465,0.9914,0.991449,0.9914,0.9914
3,0.0012,0.028877,0.995,0.995002,0.995,0.995


生成对抗样本后的模型评估结果： {'eval_loss': 0.02887747250497341, 'eval_accuracy': 0.995, 'eval_precision': 0.9950019960003194, 'eval_recall': 0.995, 'eval_f1': 0.9950000030000012, 'eval_runtime': 40.3613, 'eval_samples_per_second': 123.881, 'eval_steps_per_second': 1.957, 'epoch': 3.0}
