### Lora
![title](lora.png)

实现1源自 https://dwexzknzsh8.feishu.cn/docx/VkYud3H0zoDTrrxNX5lce0S4nDh

In [5]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import math

class LoRALinear(nn.Module):
    def __init__(self, in_features, out_features, merge, rank=16, lora_alpha=16, dropout=0.5):
        super(LoRALinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.merge = merge
        self.rank = rank
        self.dropout_rate = dropout
        self.lora_alpha = lora_alpha
        
        self.linear = nn.Linear(in_features, out_features)
        if rank > 0:
            self.lora_b = nn.Parameter(torch.zeros(out_features, rank))
            self.lora_a = nn.Parameter(torch.zeros(rank, in_features))
            self.scale = self.lora_alpha / self.rank
            self.linear.weight.requires_grad = False
        
        if self.dropout_rate > 0:
            self.dropout = nn.Dropout(self.dropout_rate)
        else:
            self.dropout = nn.Identity()
        
        self.initial_weights()
    
    def initial_weights(self):
        nn.init.kaiming_uniform_(self.lora_a, a=math.sqrt(5))
        nn.init.zeros_(self.lora_b)
        
    def forward(self, x):
        if self.rank > 0 and self.merge:
            output = F.linear(x, self.linear.weight + self.lora_b @ self.lora_a * self.scale, self.linear.bias)
            output = self.dropout(output)
            return output
        else:
            return self.dropout(self.linear(x))

实现2参考 https://github.com/huggingface/peft/blob/main/src/peft/tuners/lora/layer.py

In [11]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import math
from peft.utils.other import transpose

class LoRALinear(nn.Linear):
    def __init__(self, in_features, out_features, merge, rank=16, lora_alpha=16, dropout=0.5):
        super(LoRALinear, self).__init__(in_features, out_features)
        self.in_features = in_features
        self.out_features = out_features
        self.merge = merge
        self.rank = rank
        self.dropout_rate = dropout
        self.lora_alpha = lora_alpha
        
        if rank > 0:
            self.lora_A = nn.Linear(self.in_features, rank, bias=False)
            self.lora_B = nn.Linear(rank, self.out_features, bias=False)
            self.scale = self.lora_alpha / self.rank
            self.weight.requires_grad = False
        
        if self.dropout_rate > 0:
            self.dropout = nn.Dropout(self.dropout_rate)
        else:
            self.dropout = nn.Identity()
        
        self.initial_weights()
     
    def initial_weights(self):
        nn.Linear.reset_parameters(self)
        if self.rank > 0:
            nn.init.kaiming_uniform_(self.lora_A.weight, a=math.sqrt(5))
            nn.init.zeros_(self.lora_B.weight)
        
    def forward(self, x):
        result = F.linear(x, self.weight, self.bias)    # 判断一下是否需要转置 self.weigh
        if self.rank > 0 and self.merge:
            result += self.lora_B(self.lora_A(self.dropout(x))) * self.scale
        return result

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F

inputs = torch.randn(64, 128)
weight = torch.randn(256, 128)
bias = torch.randn(256)

# F.linear 用于矩阵的线性变换
output = F.linear(inputs, weight, bias)
output.shape

torch.Size([64, 256])