In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import accuracy_score
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 自定义数据集类
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [3]:
# 数据集加载
texts = ["Example sentence 1", "Example sentence 2", "Example sentence 3"]  # 示例文本
labels = [0, 1, 0]  # 示例标签
tokenizer = BertTokenizer.from_pretrained('./bert-base-uncased')
dataset = TextDataset(texts, labels, tokenizer, max_length=32)

# 划分训练集和验证集
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 模型初始化
model = BertForSequenceClassification.from_pretrained('./bert-base-uncased', num_labels=2)
model = model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

# 训练函数
def train_epoch(model, data_loader, optimizer, criterion, device):
    model.train()
    losses = []
    correct_predictions = 0
    for batch in data_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)
        _, preds = torch.max(outputs.logits, dim=1)
        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return correct_predictions * 1.0 / len(data_loader.dataset), np.mean(losses)

Loading weights: 100%|██████████| 199/199 [00:00<00:00, 445.95it/s, Materializing param=bert.pooler.dense.weight]                               
BertForSequenceClassification LOAD REPORT from: ./bert-base-uncased
Key                                        | Status     | 
-------------------------------------------+------------+-
cls.predictions.transform.dense.weight     | UNEXPECTED | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED | 
cls.seq_relationship.bias                  | UNEXPECTED | 
cls.predictions.bias                       | UNEXPECTED | 
cls.predictions.transform.dense.bias       | UNEXPECTED | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED | 
cls.seq_relationship.weight                | UNEXPECTED | 
classifier.weight                          | MISSING    | 
classifier.bias                            | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those 

In [4]:
# 验证函数
def eval_model(model, data_loader, criterion, device):
    model.eval()
    losses = []
    correct_predictions = 0
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)
            _, preds = torch.max(outputs.logits, dim=1)
            correct_predictions += torch.sum(preds == labels)
            losses.append(loss.item())
    return correct_predictions * 1.0 / len(data_loader.dataset), np.mean(losses)

In [10]:
# 训练与评估
epochs = 3
for epoch in range(epochs):
    train_acc, train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_acc, val_loss = eval_model(model, val_loader, criterion, device)

    print(f'Epoch {epoch + 1}/{epochs}')
    print(f'Train loss: {train_loss:.4f}, accuracy: {train_acc:.4f}')
    print(f'Val   loss: {val_loss:.4f}, accuracy: {val_acc:.4f}')

Epoch 1/3
Train loss: 0.1006, accuracy: 1.0000
Val   loss: 2.7111, accuracy: 0.0000
Epoch 2/3
Train loss: 0.1348, accuracy: 1.0000
Val   loss: 2.8078, accuracy: 0.0000
Epoch 3/3
Train loss: 0.0905, accuracy: 1.0000
Val   loss: 2.7750, accuracy: 0.0000
