In [None]:
# ! pip install  unsloth


In [None]:
from unsloth import FastLanguageModel
import torch

model_name = "unsloth/Qwen3-1.7B" # unsloth/ 前缀模型是预先优化的版本

model, tokenizer =FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_lenght = 2048,
    dtype = None, # 进行计算时（如推理或训练），4 位权重会临时转换为你指定的 dtype进行运算，以保证计算精度
    load_in_4bit = False, # 模型的权重会以 4 位精度存储在内存中（节省空间）
    full_finetuning =False
    )


## Define Lora

* q_proj：将输入特征投影到查询（Query） 空间
* k_proj：将输入特征投影到键（Key） 空间
* v_proj：将输入特征投影到值（Value） 空间
* o_proj：将注意力计算的输出投影到最终的特征空间（Output Projection）
* up_proj：FFN 将特征维度升高（如从隐藏层维度升到 4 倍）
* down_proj：FFN 将升高后的维度降回原始隐藏层维度
* gate_proj: 在一些模型（如 Mistral）的 FeedForward 层中，用于计算门控机制的投影

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    use_gradient_checkpointing= 'unsloth',
    r=16,  #LoRA 的秩（Rank），控制低秩矩阵的维度
    lora_alpha= 4, #LoRA 的缩放因子，用于调整低秩矩阵输出的权重
    lora_dropout=0,
    bias="none" #"none"：不训练任何偏置参数（最常用，节省计算）
)

In [None]:
from datasets import load_dataset
from unsloth.chat_templates import standardize_sharegpt
data_name1 = "unsloth/OpenMathReasoning-mini"
reasoning_data = load_dataset(data_name1,split="cot")
data_name2 = "mlabonne/FineTome-100k"
non_reasoning_data = load_dataset(data_name2,split="train") 

def get_conversations(data):
    pros = data['problem']
    sols = data['generated_soluation']
    conversations =[]
    for p,s in zip(pros,sols):
        conversations.append([
            {"role":"user","content":p},
            {"role":"assistant","content":s}
        ])
    return conversations

dataset = standardize_sharegpt(non_reasoning_data)
non_reasoning_conv =tokenizer.apply_chat_template(dataset["conversations"])

In [None]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model= model,
    tokenizer = tokenizer,
    train_dataset= datasets,
    args =SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        learning_rate = 2e-4,
        weight_decay= 0.01,
        num_train_epochs = 3,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 10,
        output_dir = "outputs",
        optim = "adamw_8bit",  # 8位优化器，节省内存
        report_to = "none",
    )
)

In [None]:
trainer_status =trainer.train()