In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class LoRALayer(nn.Module):
    def __init__(self, original_layer, rank=8, alpha=16):
        super().__init__()
        self.original_layer = original_layer  # 原始线性层 (例如 nn.Linear)
        self.rank = rank

        # 冻结原始权重
        for param in self.original_layer.parameters():
            param.requires_grad = False

        d, k = original_layer.weight.shape
        self.A = nn.Parameter(torch.randn(d, rank))  # 低秩矩阵 A
        self.B = nn.Parameter(torch.zeros(rank, k))  # 低秩矩阵 B
        self.scale = alpha / rank  # 缩放因子

    def forward(self, x):
        orig_output = self.original_layer(x)  # 原始输出
        lora_output = (x @ self.A.T @ self.B.T) * self.scale  # LoRA 调整项
        return orig_output + lora_output

In [None]:
from transformers import BertModel

model = BertModel.from_pretrained("bert-base-uncased")

# 替换第0层中的 query 和 value 线性层
for layer in model.encoder.layer[0].attention.self.query.children():
    if isinstance(layer, nn.Linear):
        model.encoder.layer[0].attention.self.query = LoRALayer(layer, rank=8)


In [None]:
from transformers import BertTokenizer, AdamW

# 数据准备
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
texts = ["I love NLP!", "LoRA is efficient."]
labels = [1, 0]
inputs = tokenizer(texts, padding=True, return_tensors="pt")
labels = torch.tensor(labels)

# 配置模型和优化器
model.classifier = nn.Linear(model.config.hidden_size, 2)  # 分类头
optimizer = AdamW(model.parameters(), lr=1e-4)

# 训练循环
for epoch in range(5):
    outputs = model(**inputs, labels=labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(f"Epoch {epoch}, Loss: {loss.item():.4f}")