In [1]:
#模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('qwen/Qwen-7B-chat')
model_dir

Downloading: 100%|██████████| 8.21k/8.21k [00:00<00:00, 23.9kB/s]
Downloading: 100%|██████████| 50.8k/50.8k [00:00<00:00, 151kB/s]
Downloading: 100%|██████████| 244k/244k [00:00<00:00, 577kB/s]
Downloading: 100%|██████████| 135k/135k [00:00<00:00, 339kB/s]
Downloading: 100%|██████████| 910/910 [00:00<00:00, 2.28kB/s]
Downloading: 100%|██████████| 77.0/77.0 [00:00<00:00, 240B/s]
Downloading: 100%|██████████| 2.29k/2.29k [00:00<00:00, 7.76kB/s]
Downloading: 100%|██████████| 1.88k/1.88k [00:00<00:00, 6.46kB/s]
Downloading: 100%|██████████| 249/249 [00:00<00:00, 486B/s]
Downloading: 100%|██████████| 1.63M/1.63M [00:00<00:00, 3.07MB/s]
Downloading: 100%|██████████| 1.84M/1.84M [00:00<00:00, 3.24MB/s]
Downloading: 100%|██████████| 2.64M/2.64M [00:00<00:00, 4.84MB/s]
Downloading: 100%|██████████| 6.73k/6.73k [00:00<00:00, 20.9kB/s]
Downloading: 100%|██████████| 80.8k/80.8k [00:00<00:00, 192kB/s]
Downloading: 100%|██████████| 80.8k/80.8k [00:00<00:00, 173kB/s]
Downloading: 100%|██████████| 1.8

'/home/weibs/.cache/modelscope/hub/qwen/Qwen-7B-chat'

In [None]:
from datasets import load_dataset
import torch,einops
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
from peft import LoraConfig
from trl import SFTTrainer
device = "cuda:1"
# （2）加载python split_json.py拼接好之后的1000条数据
dataset = load_dataset("json",data_files="dataset.json",split="train")
 
# （3）模型配置
base_model_name = '/home/weibs/.cache/modelscope/hub/qwen/Qwen-7B-chat' # 路径需要根据模型部署路径修改
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,     #在4bit上，进行量化
    bnb_4bit_use_double_quant=True,     # 嵌套量化，每个参数可以多节省0.4位
    bnb_4bit_quant_type="nf4",     #NF4（normalized float）或纯FP4量化 博客说推荐NF4
    bnb_4bit_compute_dtype=torch.float16)
 
# （4）QloRA微调参数配置
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
 
# （5）加载部署好的本地模型（Llama）
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,#本地模型名称
    # quantization_config=bnb_config,#上面本地模型的配置
    device_map="auto",#使用GPU的编号
    trust_remote_code=True,
    use_auth_token=True
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1
 
# （6）长文本拆分成最小的单元词（即token）
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
 
 
# （7）训练参数配置
output_dir = "./results"
training_args = TrainingArguments(
    report_to="wandb",
    output_dir=output_dir, #训练后输出目录
    per_device_train_batch_size=4, #每个GPU的批处理数据量
    gradient_accumulation_steps=4, #在执行反向传播/更新过程之前，要累积其梯度的更新步骤数
    learning_rate=2e-4, #超参、初始学习率。太大模型不稳定，太小则模型不能收敛
    logging_steps=10, #两个日志记录之间的更新步骤数
    max_steps=100 #要执行的训练步骤总数
)
max_seq_length = 512
#TrainingArguments 的参数详解：https://blog.csdn.net/qq_33293040/article/details/117376382
 
trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_args,
)
 
# （8）运行程序，进行微调
trainer.train()
 
# （9）保存模型
import os
output_dir = os.path.join(output_dir, "final_checkpoint")
trainer.model.save_pretrained(output_dir)

In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
 
#设置原来本地模型的地址
model_name_or_path = '/root/autodl-tmp/Llama2-chat-13B-Chinese-50W'
#设置微调后模型的地址，就是上面的那个地址
adapter_name_or_path = '/root/autodl-tmp/results/final_checkpoint'
#设置合并后模型的导出地址
save_path = '/root/autodl-tmp/new_model'
 
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path,
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map='auto'
)
print("load model success")
model = PeftModel.from_pretrained(model, adapter_name_or_path)
print("load adapter success")
model = model.merge_and_unload()
print("merge success")
 
tokenizer.save_pretrained(save_path)
model.save_pretrained(save_path)
print("save done.")