## 模型微调

### 1. 环境准备

In [None]:
%%capture
import os

if "COLAB_" in "".join(os.environ.keys()):
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

### 2. 下载模型

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.

qwen_models = [
    "unsloth/Qwen2.5-Coder-32B-Instruct",  # Qwen 2.5 Coder 2x faster
    "unsloth/Qwen2.5-Coder-7B",
    "unsloth/Qwen2.5-14B-Instruct",  # 14B fits in a 16GB card
    "unsloth/Qwen2.5-7B",
    "unsloth/Qwen2.5-7B-Instruct",
    "unsloth/Qwen2.5-7B-Instruct-unsloth-bnb-4bit",
    "unsloth/Qwen2.5-72B-Instruct",  # 72B fits in a 48GB card
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen2.5-7B-Instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.3.19: Fast Qwen2 patching. Transformers: 4.51.2.
   \\   /|    NVIDIA GeForce RTX 4060 Ti. Num GPUs = 1. Max memory: 7.996 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.86s/it]


### 3. 配置 LoRA 微调参数

设置 PEFT 高效微调参数，使用 Unsloth 默认参数。

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ],
    lora_alpha=16,
    lora_dropout=0,  # Supports any, but = 0 is optimized
    bias="none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None,  # And LoftQ
)

Unsloth 2025.3.19 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


### 4. 处理微调数据集

获取聊天模板

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template="qwen-2.5",
)


def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return {"text": texts, }

加载微调数据集

In [None]:
from datasets import load_dataset

dataset = load_dataset("MoChenYa/code-nomist-llm-dataset", name="default", split="train")

查看原始数据集的行列数

In [None]:
dataset.shape

(1286, 2)

将数据集修改为多轮聊天的格式

In [None]:
system_prompt_content = "请充当一个代码命名助手，请根据用户给出的项目信息和具体需求生成多个命名建议，名称之间使用 | 分隔，注意不要生成其他任何内容。"


def formatting_dateset_table2conv_func(examples):
    system_prompt = {
        "role": "system",
        "content": system_prompt_content,
    }
    user_messages = {
        "role": "user",
        "content": examples["question"],
    }
    assistant_messages = {
        "role": "assistant",
        "content": examples["answer"],
    }
    conversations = [system_prompt, user_messages, assistant_messages]
    return {"conversations": conversations}


dataset = dataset.map(formatting_dateset_table2conv_func)

Map: 100%|██████████| 1286/1286 [00:00<00:00, 23625.22 examples/s]


将聊天模板应用到数据集中

In [None]:
from unsloth.chat_templates import standardize_sharegpt

dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched=True, )

Unsloth: Standardizing formats (num_proc=12): 100%|██████████| 1286/1286 [00:09<00:00, 133.53 examples/s]
Map: 100%|██████████| 1286/1286 [00:00<00:00, 12777.42 examples/s]


查看首个数据检查是否成功格式化

In [None]:
dataset[0]

{'question': '项目类型：销售管理系统；项目介绍：跟踪销售活动，提高销售业绩和预测准确性。；当前模块：销售预测模块；目标名称类型：函数名；格式化类型：驼峰命名（首字母小写）；目标描述：获取销售预测数据；生成数量：5；',
 'answer': 'getSalesForecast|fetchSalesPrediction|retrieveForecastData|obtainSalesForecast|acquirePredictionData',
 'conversations': [{'content': '请充当一个代码命名助手，请根据用户给出的项目信息和具体需求生成多个命名建议，名称之间使用 | 分隔，注意不要生成其他任何内容。',
   'role': 'system'},
  {'content': '项目类型：销售管理系统；项目介绍：跟踪销售活动，提高销售业绩和预测准确性。；当前模块：销售预测模块；目标名称类型：函数名；格式化类型：驼峰命名（首字母小写）；目标描述：获取销售预测数据；生成数量：5；',
   'role': 'user'},
  {'content': 'getSalesForecast|fetchSalesPrediction|retrieveForecastData|obtainSalesForecast|acquirePredictionData',
   'role': 'assistant'}],
 'text': '<|im_start|>system\n请充当一个代码命名助手，请根据用户给出的项目信息和具体需求生成多个命名建议，名称之间使用 | 分隔，注意不要生成其他任何内容。<|im_end|>\n<|im_start|>user\n项目类型：销售管理系统；项目介绍：跟踪销售活动，提高销售业绩和预测准确性。；当前模块：销售预测模块；目标名称类型：函数名；格式化类型：驼峰命名（首字母小写）；目标描述：获取销售预测数据；生成数量：5；<|im_end|>\n<|im_start|>assistant\ngetSalesForecast|fetchSalesPrediction|retrieveForecastData|obtainSalesForecast|acquirePredictionData<|im_end

### 5. 训练模型

获取 Huggingface 的 SFT 训练器

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc=1,
    packing=False,  # Can make training 5x faster for short sequences.
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,  # Fixed major bug in latest Unsloth
        warmup_steps=5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps=30,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="paged_adamw_8bit",  # Save more memory
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",  # Use this for WandB etc
    ),
)

Unsloth: Tokenizing ["text"]: 100%|██████████| 1286/1286 [00:00<00:00, 4474.65 examples/s]


设置只在响应内容上进行损失计算

In [None]:
from unsloth.chat_templates import train_on_responses_only

trainer = train_on_responses_only(
    trainer,
    instruction_part="<|im_start|>user\n",
    response_part="<|im_start|>assistant\n",
)

Map (num_proc=12): 100%|██████████| 1286/1286 [00:09<00:00, 130.87 examples/s]


查看设置效果

In [None]:
tokenizer.decode(trainer.train_dataset[0]["input_ids"])

'<|im_start|>system\n请充当一个代码命名助手，请根据用户给出的项目信息和具体需求生成多个命名建议，名称之间使用 | 分隔，注意不要生成其他任何内容。<|im_end|>\n<|im_start|>user\n项目类型：销售管理系统；项目介绍：跟踪销售活动，提高销售业绩和预测准确性。；当前模块：销售预测模块；目标名称类型：函数名；格式化类型：驼峰命名（首字母小写）；目标描述：获取销售预测数据；生成数量：5；<|im_end|>\n<|im_start|>assistant\ngetSalesForecast|fetchSalesPrediction|retrieveForecastData|obtainSalesForecast|acquirePredictionData<|im_end|>\n'

In [None]:
space = tokenizer(" ", add_special_tokens=False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[0]["labels"]])

'                                                                                                              getSalesForecast|fetchSalesPrediction|retrieveForecastData|obtainSalesForecast|acquirePredictionData<|im_end|>\n'

开始训练

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,286 | Num Epochs = 1 | Total steps = 30
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 40,370,176/7,000,000,000 (0.58% trained)


Step,Training Loss
1,0.9595
2,1.7884
3,1.5049
4,1.3036
5,1.0496
6,1.2781
7,1.3579
8,1.1666
9,1.033
10,0.8201


### 6. 测试模型

#### 6.1. 读取已保存的适配器（可选）

In [None]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

#### 6.2. 定义消息生成函数

In [None]:
def create_user_content(name, introduce, module, target_type, format_type, target_desc, num):
    return f"项目类型：{name}；项目介绍：{introduce}；当前模块：{module}；目标名称类型：{target_type}；格式化类型：{format_type}；目标描述：{target_desc}；生成数量：{num}；"

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template="qwen-2.5",
)
FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

def test_message(*args):
    messages = [
        {"role": "system", "content": system_prompt_content},
        {"role": "user", "content": create_user_content(*args)},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,  # Must add for generation
        return_tensors="pt",
    ).to("cuda")

    outputs = model.generate(input_ids=inputs, max_new_tokens=64, use_cache=True, temperature=1.7, min_p=0.1)
    tokenizer.batch_decode(outputs)

#### 6.3. 测试用例

In [None]:
test_message(
    name="在线教育平台",
    introduce="在线教育平台是一个提供在线学习和教学服务的网站或应用程序，用户可以通过它访问各种课程、学习资源和教师支持。",
    module="课程管理",
    target_type="课程名称",
    format_type="简化",
    target_desc="课程的名称或标题",
    num=5,
)

### 7. 保存模型

#### 7.1. 配置参数

In [None]:
new_model_name = "CodeNomist-Qwen2.5-7B-Instruct-unsloth"
hf_repo = "MoChenYa/CodeNomist-Qwen2.5-7B-Instruct-unsloth"
hf_token = "hf_..."

#### 7.2. 保存适配器

In [None]:
if "COLAB_" in "".join(os.environ.keys()):
    model.push_to_hub(hf_repo, token = hf_token) # Online saving
    tokenizer.push_to_hub(hf_repo, token = hf_token) # Online saving
else:
    model.save_pretrained(new_model_name)
    tokenizer.save_pretrained(new_model_name)

#### 7.3. 合并适配器保存到本地

In [None]:
model.save_pretrained_merged(new_model_name, tokenizer)

if "COLAB_" in "".join(os.environ.keys()):
    model.push_to_hub_merged(hf_repo, tokenizer, save_method = "merged_16bit", token = "")

#### 7.4. 保存或推送gguf格式

In [None]:
if "COLAB_" in "".join():
    model.push_to_hub_gguf(
        hf_repo,
        tokenizer,
        quantization_method = ["f16", "q8_0"],
        token = hf_token,
    )
else:
    model.save_pretrained_gguf(new_model_name, tokenizer)