In [1]:
from datasets import load_dataset

# 准备数据并进行数据划分
tomatoes = load_dataset("rotten_tomatoes")
train_data, test_data = tomatoes["train"], tomatoes["test"]

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 加载模型和分词器
model_id = "bert-base-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
from transformers import DataCollatorWithPadding

# 对批次中的序列进行填充，使其长度与最长序列一致
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


def preprocess_function(examples):
    """对输入数据进行分词处理"""
    return tokenizer(examples["text"], truncation=True)


# 对训练数据和测试数据进行分词处理
tokenized_train = train_data.map(preprocess_function, batched=True)
tokenized_test = test_data.map(preprocess_function, batched=True)

In [4]:
import numpy as np
import evaluate


def compute_metrics(eval_pred):
    """计算F1分数"""
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    f1_metric = evaluate.load("f1")
    f1 = f1_metric.compute(predictions=predictions, references=labels)["f1"]
    return {"f1": f1}

In [5]:
from transformers import TrainingArguments, Trainer

# 用于参数调优的训练参数
training_args = TrainingArguments(
    "model",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
    save_strategy="epoch",
    report_to="none",
)
# 执行训练过程的Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
trainer.train()
trainer.evaluate()

  trainer = Trainer(


Step,Training Loss
500,0.4164




{'eval_loss': 0.3681354820728302,
 'eval_f1': 0.842497670083877,
 'eval_runtime': 8.3084,
 'eval_samples_per_second': 128.304,
 'eval_steps_per_second': 8.064,
 'epoch': 1.0}

In [6]:
# 加载模型和分词器
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# 打印层的名称
for name, param in model.named_parameters():
    print(name)

bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.attention.self.query.weight
bert.enc

In [8]:
for name, param in model.named_parameters():
    # 可训练的分类头
    if name.startswith("classifier"):
        param.requires_grad = True
    # 冻结其他所有结构
    else:
        param.requires_grad = False

In [9]:
from transformers import TrainingArguments, Trainer

# 执行训练过程的Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
trainer.train()
trainer.evaluate()

  trainer = Trainer(


Step,Training Loss
500,0.695




{'eval_loss': 0.6818975806236267,
 'eval_f1': 0.6308724832214765,
 'eval_runtime': 7.7131,
 'eval_samples_per_second': 138.207,
 'eval_steps_per_second': 8.687,
 'epoch': 1.0}

In [10]:
# 加载模型
model_id = "bert-base-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# 编码器块11从索引165开始
# 我们冻结该块之前的所有结构
for index, (name, param) in enumerate(model.named_parameters()):
    if index < 165:
        param.requires_grad = False
# 执行训练过程的Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
trainer.train()
trainer.evaluate()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
500,0.4768




{'eval_loss': 0.41169703006744385,
 'eval_f1': 0.8164435946462715,
 'eval_runtime': 7.4414,
 'eval_samples_per_second': 143.252,
 'eval_steps_per_second': 9.004,
 'epoch': 1.0}