In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import os

In [3]:
# 您微调时使用的原始基础模型的Hugging Face ID
# (根据您的目录名称，很可能就是这个)
base_model_id = "/train/Llama-3-8B-Instruct"

# 您的LoRA适配器所在的目录
adapter_path = "/llama3-8b-sympo-5e-5_0.1/checkpoint-3753"

# 您想将合并后的完整模型保存到的【新】目录
merged_model_path = "/llama3-8b-sympo-5e-5_0.1_merged/checkpoint-3753"

In [4]:
# --- 2. 执行合并的代码 ---

# 确保目标目录存在
os.makedirs(merged_model_path, exist_ok=True)

In [5]:
print(f"正在加载基础模型: {base_model_id}")
# 加载基础模型，请确保您的Colab有足够的内存和显存
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

print(f"正在加载分词器: {base_model_id}")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)

`torch_dtype` is deprecated! Use `dtype` instead!


正在加载基础模型: /train/Llama-3-8B-Instruct


Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.26it/s]


正在加载分词器: /train/Llama-3-8B-Instruct


In [6]:
print(f"正在加载LoRA适配器: {adapter_path}")
# 将LoRA适配器加载到基础模型上
merged_model = PeftModel.from_pretrained(base_model, adapter_path)

print("正在合并权重...")
# 合并权重，然后卸载适配器，得到一个独立的、完整的模型
merged_model = merged_model.merge_and_unload()

print(f"正在将合并后的完整模型保存到: {merged_model_path}")
# 将这个新模型和分词器一起保存到新目录
merged_model.save_pretrained(merged_model_path)
tokenizer.save_pretrained(merged_model_path)

print("\n模型合并并保存完毕！")

正在加载LoRA适配器: /llama3-8b-sympo-5e-5_0.1/checkpoint-3753
正在合并权重...
正在将合并后的完整模型保存到: /llama3-8b-sympo-5e-5_0.1_merged/checkpoint-3753

模型合并并保存完毕！
