In [8]:
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

# 加载基础模型
model = AutoModelForCausalLM.from_pretrained("/home/hfd24/model/qwen/Qwen2.5-3B-Instruct")

# 定义 LoRA 配置
lora_config = LoraConfig(
    r=8,  # LoRA 的秩
    lora_alpha=32,  # 缩放因子
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # 目标模块
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 应用 LoRA 到模型
peft_model = get_peft_model(model, lora_config)

# 打印模型参数
print("Trainable parameters:")
peft_model.print_trainable_parameters()

# 打印 LoRA 参数名称
print("\nLoRA parameters:")
for name, param in peft_model.named_parameters():
    if param.requires_grad:
        print(name)

# 如果你想查看所有参数（包括基础模型和LoRA）
print("\nAll parameters:")
print(peft_model.state_dict().keys())
print("\n")

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.07it/s]


Trainable parameters:
trainable params: 3,686,400 || all params: 3,089,625,088 || trainable%: 0.1193

LoRA parameters:
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight
base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight
base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight
base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight
base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight
base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight
base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight
base_model.model.model.layers

In [9]:
print(model.state_dict().keys())  # 查看实际的参数名称
print("\n")

odict_keys(['model.embed_tokens.weight', 'model.layers.0.self_attn.q_proj.base_layer.weight', 'model.layers.0.self_attn.q_proj.base_layer.bias', 'model.layers.0.self_attn.q_proj.lora_A.default.weight', 'model.layers.0.self_attn.q_proj.lora_B.default.weight', 'model.layers.0.self_attn.k_proj.base_layer.weight', 'model.layers.0.self_attn.k_proj.base_layer.bias', 'model.layers.0.self_attn.k_proj.lora_A.default.weight', 'model.layers.0.self_attn.k_proj.lora_B.default.weight', 'model.layers.0.self_attn.v_proj.base_layer.weight', 'model.layers.0.self_attn.v_proj.base_layer.bias', 'model.layers.0.self_attn.v_proj.lora_A.default.weight', 'model.layers.0.self_attn.v_proj.lora_B.default.weight', 'model.layers.0.self_attn.o_proj.base_layer.weight', 'model.layers.0.self_attn.o_proj.lora_A.default.weight', 'model.layers.0.self_attn.o_proj.lora_B.default.weight', 'model.layers.0.mlp.gate_proj.weight', 'model.layers.0.mlp.up_proj.weight', 'model.layers.0.mlp.down_proj.weight', 'model.layers.0.input_l

In [11]:
for name, param in peft_model.named_parameters():
    print(f"{name}: {param.shape} (requires_grad={param.requires_grad})")

base_model.model.model.embed_tokens.weight: torch.Size([151936, 2048]) (requires_grad=False)
base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight: torch.Size([2048, 2048]) (requires_grad=False)
base_model.model.model.layers.0.self_attn.q_proj.base_layer.bias: torch.Size([2048]) (requires_grad=False)
base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight: torch.Size([8, 2048]) (requires_grad=True)
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight: torch.Size([2048, 8]) (requires_grad=True)
base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight: torch.Size([256, 2048]) (requires_grad=False)
base_model.model.model.layers.0.self_attn.k_proj.base_layer.bias: torch.Size([256]) (requires_grad=False)
base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight: torch.Size([8, 2048]) (requires_grad=True)
base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight: torch.Size([256, 8]) (requires_grad=True)
base_m

In [12]:
for name, param in model.named_parameters():
    print(f"{name}: {param.shape} (requires_grad={param.requires_grad})")

model.embed_tokens.weight: torch.Size([151936, 2048]) (requires_grad=False)
model.layers.0.self_attn.q_proj.base_layer.weight: torch.Size([2048, 2048]) (requires_grad=False)
model.layers.0.self_attn.q_proj.base_layer.bias: torch.Size([2048]) (requires_grad=False)
model.layers.0.self_attn.q_proj.lora_A.default.weight: torch.Size([8, 2048]) (requires_grad=True)
model.layers.0.self_attn.q_proj.lora_B.default.weight: torch.Size([2048, 8]) (requires_grad=True)
model.layers.0.self_attn.k_proj.base_layer.weight: torch.Size([256, 2048]) (requires_grad=False)
model.layers.0.self_attn.k_proj.base_layer.bias: torch.Size([256]) (requires_grad=False)
model.layers.0.self_attn.k_proj.lora_A.default.weight: torch.Size([8, 2048]) (requires_grad=True)
model.layers.0.self_attn.k_proj.lora_B.default.weight: torch.Size([256, 8]) (requires_grad=True)
model.layers.0.self_attn.v_proj.base_layer.weight: torch.Size([256, 2048]) (requires_grad=False)
model.layers.0.self_attn.v_proj.base_layer.bias: torch.Size([2

In [13]:
def remove_prefix_from_model(model, prefix):
    """
    递归地从模型的所有参数名中删除指定前缀
    """
    from collections import OrderedDict
    
    # 处理当前模块的参数
    new_parameters = OrderedDict()
    for name, param in model.named_parameters():
        if name.startswith(prefix):
            new_name = name[len(prefix):]
        else:
            new_name = name
        new_parameters[new_name] = param
    
    # 更新当前模块的参数
    model._parameters = OrderedDict()
    for name, param in new_parameters.items():
        model.register_parameter(name, param)
    
    # 递归处理子模块
    for name, module in model.named_children():
        remove_prefix_from_model(module, prefix)

# 使用示例
remove_prefix_from_model(peft_model, "base_model.model.")

# 验证
for name, param in peft_model.named_parameters():
    print(f"{name}: {param.shape} (requires_grad={param.requires_grad})")

KeyError: 'parameter name can\'t contain "."'