In [1]:
import copy
import os
from peft import get_peft_model, LoraConfig, TaskType, PeftModel
import peft
import torch
from torch import nn
import torch.nn.functional as F
from MLP_function import *


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
set_all_seeds(45)
# 3D CNN
X = torch.rand((1000, 1, 28, 28, 28))
y = (X.sum(dim=[1,2,3,4]) > 11000).long()  # Binary classification based on voxel sum

print(y[0 :20])
print('y.sum',y.sum())

tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1])
y.sum tensor(284)


In [3]:
n_train = 800
batch_size = 64
train_dataloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X[:n_train], y[:n_train]),
    batch_size=batch_size,
    shuffle=True,
)
eval_dataloader = torch.utils.data.DataLoader(
    torch.utils.data.TensorDataset(X[n_train:], y[n_train:]),
    batch_size=batch_size,
)
lr = 0.002
max_epochs = 30

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [4]:
# Lora
config = peft.LoraConfig(
    r=8,
    target_modules=["conv.0", "conv.3", 'fc.0'],
    # modules_to_save=["fc"] ,
)

original_model = CNN3D().to(device)
original_model_copy = copy.deepcopy(original_model)  # we keep a copy of the original model for later

In [5]:

# 3D CNN
## 
print('orginal_model type',type(original_model))
# print('orginal_model_copy type',type(original_model_copy))

print('before build peft model==========')
print('trainalbe parameter in orginal_model:')
for name, param in original_model.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")

# print("可训练参数列表:")
# for name, param in original_model.named_parameters():
#     if param.requires_grad:
#         print(f"{name}: {param.shape}")

print('after build peft model==========')
peft_model = peft.get_peft_model(original_model_copy, config)
optimizer = optim.Adam(peft_model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
print('trainalbe parameter in peft_model:')
for name, param in peft_model.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")

# print("可训练参数列表:")
# for name, param in peft_model.named_parameters():
#     if param.requires_grad:
#         print(f"{name}: {param.shape}")

peft_model.print_trainable_parameters()

    

orginal_model type <class 'MLP_function.CNN3D'>
trainalbe parameter in orginal_model:
Layer: conv.0.weight | Trainable: True
Layer: conv.0.bias | Trainable: True
Layer: conv.3.weight | Trainable: True
Layer: conv.3.bias | Trainable: True
Layer: fc.0.weight | Trainable: True
Layer: fc.0.bias | Trainable: True
Layer: fc.2.weight | Trainable: True
Layer: fc.2.bias | Trainable: True
trainalbe parameter in peft_model:
Layer: base_model.model.conv.0.base_layer.weight | Trainable: False
Layer: base_model.model.conv.0.base_layer.bias | Trainable: False
Layer: base_model.model.conv.0.lora_A.default.weight | Trainable: True
Layer: base_model.model.conv.0.lora_B.default.weight | Trainable: True
Layer: base_model.model.conv.3.base_layer.weight | Trainable: False
Layer: base_model.model.conv.3.base_layer.bias | Trainable: False
Layer: base_model.model.conv.3.lora_A.default.weight | Trainable: True
Layer: base_model.model.conv.3.lora_B.default.weight | Trainable: True
Layer: base_model.model.fc.0.ba

In [6]:
train(peft_model, optimizer, criterion, train_dataloader, eval_dataloader, device,epochs=max_epochs)

Batch 0 get train loss: 0.6353276371955872 val loss: 0.5333637595176697
Batch 1 get train loss: 0.6056520342826843 val loss: 0.5178259611129761
Batch 2 get train loss: 0.6068936586380005 val loss: 0.5324974656105042
Batch 3 get train loss: 0.6043715476989746 val loss: 0.5260958671569824
Batch 4 get train loss: 0.6008870601654053 val loss: 0.5142931342124939
Batch 5 get train loss: 0.6099104285240173 val loss: 0.5510181188583374
Batch 6 get train loss: 0.5951511263847351 val loss: 0.5079817175865173
Batch 7 get train loss: 0.6089515686035156 val loss: 0.5311503410339355
Batch 8 get train loss: 0.6029284596443176 val loss: 0.5400116443634033
Batch 9 get train loss: 0.6032438278198242 val loss: 0.5250625014305115
Batch 10 get train loss: 0.6046921610832214 val loss: 0.5064818263053894
Batch 11 get train loss: 0.5955328345298767 val loss: 0.5295922756195068
Batch 12 get train loss: 0.5906210541725159 val loss: 0.5331742167472839
Batch 13 get train loss: 0.5826533436775208 val loss: 0.50256

In [6]:
## First train of peft model
train(peft_model, optimizer, criterion, train_dataloader, eval_dataloader, device,epochs=max_epochs)

Batch 0 get train loss: 0.6353276371955872 val loss: 0.5333637595176697
Batch 1 get train loss: 0.6056520342826843 val loss: 0.5178259611129761
Batch 2 get train loss: 0.6068936586380005 val loss: 0.5324974656105042
Batch 3 get train loss: 0.6043715476989746 val loss: 0.5260958671569824
Batch 4 get train loss: 0.6008870601654053 val loss: 0.5142931342124939
Batch 5 get train loss: 0.6099104285240173 val loss: 0.5510181188583374
Batch 6 get train loss: 0.5951511263847351 val loss: 0.5079817175865173
Batch 7 get train loss: 0.6089515686035156 val loss: 0.5311503410339355
Batch 8 get train loss: 0.6029284596443176 val loss: 0.5400116443634033
Batch 9 get train loss: 0.6032438278198242 val loss: 0.5250625014305115
Batch 10 get train loss: 0.6046921610832214 val loss: 0.5064818263053894
Batch 11 get train loss: 0.5955328345298767 val loss: 0.5295922756195068
Batch 12 get train loss: 0.5906210541725159 val loss: 0.5331742167472839
Batch 13 get train loss: 0.5826533436775208 val loss: 0.50256

In [None]:
# Save lora adapter
adapter_save_path = 'lora_adapter/adapter_1'
peft_model.save_pretrained(adapter_save_path)

In [None]:
# load adapter
model_name = 'lora_adapter/adapter_1'
peft_model_from_adapter = peft.PeftModel.from_pretrained(original_model_copy, model_name)
print('peft_model_from_adapter type',type(peft_model_from_adapter))
print('original_model_copy type',type(original_model_copy))
with torch.no_grad():
    y_peft = peft_model(X.to(device))
    y_loaded_adapter = peft_model_from_adapter(X.to(device))
print('if peft_model and peft_model_from_adapter have same results:',torch.allclose(y_peft, y_loaded_adapter))
del y_peft, y_loaded_adapter
torch.cuda.empty_cache()

peft_model_from_adapter type <class 'peft.peft_model.PeftModel'>
original_model_copy type <class 'MLP_function.CNN3D'>
if peft_model and peft_model_from_adapter have same results True


In [14]:
### Merging
model_name = 'lora_adapter/adapter_1'
unmerged_peft_model = peft.PeftModel.from_pretrained(original_model_copy, model_name)
print('before merging==========')
print('unmerged_peft_model type',type(unmerged_peft_model))
print('original_model_copy type',type(original_model_copy))
print('trainalbe parameter in unmerged_peft_model:')
for name, param in unmerged_peft_model.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")

print('after merging==========')

merged_peft_model = unmerged_peft_model.merge_and_unload()  
print('merged_peft_model type',type(merged_peft_model))
print('unmerged_peft_model type',type(unmerged_peft_model))

print('trainalbe parameter in merged_peft_model:')
for name, param in merged_peft_model.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")


ValueError: Can't find 'adapter_config.json' at 'lora_adapter/adapter_1'

In [13]:
# Compare results of merged model and unmerged model
y_unmerged = unmerged_peft_model(X.to(device))
y_merged = merged_peft_model(X.to(device))
print('loRA model merged sucessfull:',torch.allclose(y_unmerged, y_merged))

loRA model merged sucessfull: True


In [16]:
y_module = module_copy(X.to(device))
print('check if merged model same as original model:',torch.allclose(y_module, y_merged))

check if merged model same as original model: True


In [14]:
print('module copy:',type(module_copy))

for name, param in module_copy.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")

print('\nmodule:',type(module))

for name, param in module.named_parameters():
    print(f"Layer: {name} | Trainable: {param.requires_grad}")

module copy: <class 'MLP_function.CNN3D'>
Layer: conv.0.weight | Trainable: False
Layer: conv.0.bias | Trainable: False
Layer: conv.3.weight | Trainable: False
Layer: conv.3.bias | Trainable: False
Layer: fc.0.weight | Trainable: False
Layer: fc.0.bias | Trainable: False
Layer: fc.2.weight | Trainable: False
Layer: fc.2.bias | Trainable: False

module: <class 'MLP_function.CNN3D'>
Layer: conv.0.weight | Trainable: True
Layer: conv.0.bias | Trainable: True
Layer: conv.3.weight | Trainable: True
Layer: conv.3.bias | Trainable: True
Layer: fc.0.weight | Trainable: True
Layer: fc.0.bias | Trainable: True
Layer: fc.2.weight | Trainable: True
Layer: fc.2.bias | Trainable: True


In [15]:
# 1. 比较 state_dict 的 key 集合
orig_sd   = module_copy.state_dict()
merged_sd = merged_peft_model.state_dict()

orig_keys   = set(orig_sd.keys())
merged_keys = set(merged_sd.keys())

print("Key 集合完全相同：", orig_keys == merged_keys)
if orig_keys != merged_keys:
    print("仅在原 model 中有的 keys:", orig_keys - merged_keys)
    print("仅在 merged_model 中有的 keys:", merged_keys - orig_keys)

#    通常 merge 后参数会变化；如果你只是想确认结构，shape 检查到这里就够了。
for k in orig_keys & merged_keys:
    a = orig_sd[k]
    b = merged_sd[k]
    if not torch.equal(a, b):
        print(f"参数值不同: {k} （或使用 torch.allclose 验证近似相等）")
        # 如果想看具体差异，可以打印范数／最大差值
        diff = (a - b).abs()
        print(f"   max|Δ| = {diff.max():.3e}, mean|Δ| = {diff.mean():.3e}")
        # 只示例第一个不同的 key，然后跳出
        break
else:
    print("所有参数值完全相同！")

Key 集合完全相同： True
所有参数值完全相同！
