In [1]:
import torch
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import Dataset
from transformers import EarlyStoppingCallback
from torch.nn import CrossEntropyLoss

In [2]:
# 检查GPU可用性
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
# 数据加载
df = pd.read_csv(r"D:\GitHubRepos\is6941-ml-social-media\taptap\data\integrated\lm_cleaned_taptap_reviews.csv")
df = df[['review_content', 'sentiment']].dropna()
df['sentiment'] = df['sentiment'].astype(int)

In [4]:
# 划分数据集
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['review_content'].tolist(),
    df['sentiment'].tolist(),
    test_size=0.2,
    random_state=42
)

In [5]:
# 修正后的数据集类
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, 
                                 padding='max_length',  # 统一填充长度
                                 truncation=True, 
                                 max_length=256)
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': torch.tensor(self.encodings['input_ids'][idx]),
            'attention_mask': torch.tensor(self.encodings['attention_mask'][idx]),
            'labels': torch.tensor(self.labels[idx])
        }

    def __len__(self):
        return len(self.labels)

In [6]:
# 计算类别权重
class_weights = torch.tensor(
    [len(train_labels)/sum(train_labels),  # 正样本权重
     len(train_labels)/(len(train_labels)-sum(train_labels))],  # 负样本权重
    device=device
)

In [7]:
# 初始化模型和分词器
tokenizer = BertTokenizer.from_pretrained("hfl/chinese-roberta-wwm-ext")
# 修改模型初始化
model = BertForSequenceClassification.from_pretrained(
    "hfl/chinese-roberta-wwm-ext",
    num_labels=2,
)

# 将类别权重移动到GPU
class_weights = class_weights.to(device)



vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/19.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/689 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/412M [00:00<?, ?B/s]

  return torch.load(checkpoint_file, map_location="cpu")
Some weights of the model checkpoint at hfl/chinese-roberta-wwm-ext were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSeque

In [8]:
# 创建数据集（保持CPU张量）
train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)
test_dataset = SentimentDataset(test_texts, test_labels, tokenizer)

In [9]:
# 训练配置（自动处理数据到GPU）
training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    fp16=True,  # 自动启用pin_memory
    dataloader_pin_memory=True,  # 显式启用内存固定
    # learning_rate=3e-5,  # 初始学习率从默认的5e-5调低
    # warmup_ratio=0.1,    # 添加学习率预热
    # weight_decay=0.01,   # L2正则化
    # gradient_accumulation_steps=2,   # 梯度累积
)

In [10]:
# 自定义评估函数
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    print("\nClassification Report:")
    print(classification_report(labels, preds))
    print("Confusion Matrix:")
    print(confusion_matrix(labels, preds))
    return {'accuracy': (preds == labels).mean()}

In [11]:
# 创建训练器
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [12]:
# 开始训练（自动处理数据迁移）
trainer.train()



  0%|          | 0/3000 [00:00<?, ?it/s]

{'loss': 0.4236, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.5}
{'loss': 0.3798, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.0}


  0%|          | 0/125 [00:00<?, ?it/s]


Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.70      0.75      2740
           1       0.85      0.91      0.88      5257

    accuracy                           0.84      7997
   macro avg       0.83      0.80      0.81      7997
weighted avg       0.83      0.84      0.83      7997

Confusion Matrix:
[[1920  820]
 [ 482 4775]]
{'eval_loss': 0.3587197959423065, 'eval_accuracy': 0.8371889458546955, 'eval_runtime': 16.0102, 'eval_samples_per_second': 499.493, 'eval_steps_per_second': 7.808, 'epoch': 1.0}
{'loss': 0.2815, 'learning_rate': 2.5e-05, 'epoch': 1.5}
{'loss': 0.2718, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


  0%|          | 0/125 [00:00<?, ?it/s]


Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.74      0.76      2740
           1       0.87      0.89      0.88      5257

    accuracy                           0.84      7997
   macro avg       0.83      0.82      0.82      7997
weighted avg       0.84      0.84      0.84      7997

Confusion Matrix:
[[2038  702]
 [ 572 4685]]
{'eval_loss': 0.3609601557254791, 'eval_accuracy': 0.8406902588470676, 'eval_runtime': 15.9971, 'eval_samples_per_second': 499.904, 'eval_steps_per_second': 7.814, 'epoch': 2.0}
{'loss': 0.1583, 'learning_rate': 8.366666666666667e-06, 'epoch': 2.5}
{'loss': 0.1541, 'learning_rate': 3.3333333333333334e-08, 'epoch': 3.0}


  0%|          | 0/125 [00:00<?, ?it/s]


Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.75      0.76      2740
           1       0.87      0.89      0.88      5257

    accuracy                           0.84      7997
   macro avg       0.82      0.82      0.82      7997
weighted avg       0.84      0.84      0.84      7997

Confusion Matrix:
[[2057  683]
 [ 593 4664]]
{'eval_loss': 0.47668203711509705, 'eval_accuracy': 0.8404401650618982, 'eval_runtime': 15.8009, 'eval_samples_per_second': 506.11, 'eval_steps_per_second': 7.911, 'epoch': 3.0}
{'train_runtime': 673.5443, 'train_samples_per_second': 142.476, 'train_steps_per_second': 4.454, 'train_loss': 0.2781887003580729, 'epoch': 3.0}


TrainOutput(global_step=3000, training_loss=0.2781887003580729, metrics={'train_runtime': 673.5443, 'train_samples_per_second': 142.476, 'train_steps_per_second': 4.454, 'train_loss': 0.2781887003580729, 'epoch': 3.0})

In [13]:
# 最终测试集评估
test_results = trainer.predict(test_dataset)
print("\nFinal Test Set Evaluation:")
compute_metrics(test_results)

  0%|          | 0/125 [00:00<?, ?it/s]


Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.75      0.76      2740
           1       0.87      0.89      0.88      5257

    accuracy                           0.84      7997
   macro avg       0.82      0.82      0.82      7997
weighted avg       0.84      0.84      0.84      7997

Confusion Matrix:
[[2057  683]
 [ 593 4664]]

Final Test Set Evaluation:

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.75      0.76      2740
           1       0.87      0.89      0.88      5257

    accuracy                           0.84      7997
   macro avg       0.82      0.82      0.82      7997
weighted avg       0.84      0.84      0.84      7997

Confusion Matrix:
[[2057  683]
 [ 593 4664]]


{'accuracy': 0.8404401650618982}