In [1]:
from unsloth import FastModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 08-21 12:46:49 [importing.py:53] Triton module has been replaced with a placeholder.
INFO 08-21 12:46:49 [__init__.py:239] Automatically detected platform cuda.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
from datasets import load_dataset
from datasets import Dataset
import pandas as pd
import numpy as np

from unsloth.chat_templates import standardize_data_formats
from transformers import TextStreamer

支持的模型列表
https://docs.unsloth.ai/get-started/all-our-models

In [3]:


model_name = "unsloth/Qwen3-4B-Instruct-2507" # unsloth/ 前缀模型是预先优化的版本
max_seq_length =2048
model, tokenizer =FastModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = None, # 进行计算时（如推理或训练），4 位权重会临时转换为你指定的 dtype进行运算，以保证计算精度
    load_in_4bit = True, # 模型的权重会以 4 位精度存储在内存中（节省空间）
    full_finetuning =False
    )



==((====))==  Unsloth 2025.8.8: Fast Qwen3 patching. Transformers: 4.54.1. vLLM: 0.8.5.post1.
   \\   /|    NVIDIA GeForce RTX 3070 Ti Laptop GPU. Num GPUs = 1. Max memory: 8.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post2. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
model = FastModel.get_peft_model(
    model,
    target_modules=["q_proj","k_proj","v_proj","o_proj","up_proj","down_proj"],
    use_gradient_checkpointing= 'unsloth',
    r=16,  #LoRA 的秩（Rank），控制低秩矩阵的维度
    lora_alpha= 4, 
    lora_dropout=0,
    bias="none", #"none"：不训练任何偏置参数（最常用，节省计算），
    random_state= 42,
    use_rslora=False,
    loftq_config= None,
)

Unsloth: Making `model.base_model.model.model` require gradients


In [5]:
reason_start ="<start_working_out>"
reason_end ="<end_working_out>"
solution_start ="<SOLUTION>"
solution_end ="</SOLUTION>"

system_prompt= f"You are given a problem.\n"\
f"Think about the problem and provide your working out.\n"\
f"Place it between {reason_start} and {reason_end}.\n"\
f"Then, provide your solution between {solution_start}{solution_end}\n"
system_prompt

'You are given a problem.\nThink about the problem and provide your working out.\nPlace it between <start_working_out> and <end_working_out>.\nThen, provide your solution between <SOLUTION></SOLUTION>\n'

In [6]:
chat_template = \
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + eos_token }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_prompt}' + eos_token }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ message['content'] + eos_token}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ message['content'] + eos_token }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}{{ '{reason_start}' }}"\
    "{% endif %}"



# Replace with out specific template:
chat_template = chat_template\
    .replace("'{system_prompt}'", f"'{system_prompt}'")\
    .replace("'{reason_start}'", f"'{reason_start}'")
tokenizer.chat_template = chat_template

In [7]:

def get_text_by_template(prompt1,reason1,solution1,prompt2):
    messages = [
        {"role" : "user", "content" : f"{prompt1}"},
        {"role" : "assistant", "content" : f"{reason_start}{reason1}{reason_end}{solution_start}{solution1}{solution_end}"},
        {"role" : "user", "content" : f"{prompt2}"},

    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize = False,
        add_generation_prompt = True, # Must add for generation
    )
    return text

In [8]:



dataset = load_dataset("unsloth/OpenMathReasoning-mini", split = "cot")
dataset = dataset.to_pandas()[
    ["expected_answer", "problem", "generated_solution"]
]

# Try converting to number - if not, replace with NaN
is_number = pd.to_numeric(pd.Series(dataset["expected_answer"]), errors = "coerce").notnull()
# Select only numbers
dataset = dataset.iloc[np.where(is_number)[0]]
# print(dataset.info())

def format_dataset(x):
    expected_answer = x['expected_answer']
    problem = x["problem"]
    thoughts = x["generated_solution"]
    thoughts = thoughts.replace("<think>","").replace("</think>","").strip()

    final_prompt = reason_start+ thoughts +reason_end+solution_start+expected_answer+solution_end

    return [
        {"role": "system","content":system_prompt},
        {"role": "user","content":problem },
        {"role":"assistant","content":final_prompt}
    ]
dataset['Messages'] = dataset.apply(format_dataset,axis=1)
dataset.head(10)

Unnamed: 0,expected_answer,problem,generated_solution,Messages
0,14,Given $\sqrt{x^2+165}-\sqrt{x^2-52}=7$ and $x$...,"<think>\nOkay, let's see. I need to solve the ...","[{'role': 'system', 'content': 'You are given ..."
6,-2,Find the value of the parameter $a$ for which ...,"<think>\nOkay, so I need to find the value of ...","[{'role': 'system', 'content': 'You are given ..."
9,18,What is the sum of all real numbers $x$ for wh...,"<think>\nOkay, so I need to solve the equation...","[{'role': 'system', 'content': 'You are given ..."
13,2,Evaluate the sum \(\sum_{n=1}^\infty \frac{\ph...,"<think>\nOkay, so I need to evaluate the infin...","[{'role': 'system', 'content': 'You are given ..."
17,30,What is the largest positive integer that divi...,"<think>\nAlright, so I need to find the larges...","[{'role': 'system', 'content': 'You are given ..."
18,2,Evaluate the limit:\n\[ \lim_{n \to \infty} \l...,"<think>\nOkay, so I need to find the limit as ...","[{'role': 'system', 'content': 'You are given ..."
21,0,Let $(u_n)$ be a sequence of positive real num...,"<think>\nOkay, let's try to figure out this pr...","[{'role': 'system', 'content': 'You are given ..."
23,12,Suppose that the equations $y = x^3 - 3x + 5$ ...,"<think>\nOkay, so I have this problem where I ...","[{'role': 'system', 'content': 'You are given ..."
26,4034036,What integer equals \( \sqrt{2000 \times 2008 ...,"<think>\nOkay, so I need to find the integer t...","[{'role': 'system', 'content': 'You are given ..."
27,28,A rectangle is inscribed in a circle with an a...,"<think>\nOkay, let's see. I have a problem whe...","[{'role': 'system', 'content': 'You are given ..."


In [9]:
tokenizer.apply_chat_template(dataset['Messages'][0],tokenize =False)

"You are given a problem.\nThink about the problem and provide your working out.\nPlace it between <start_working_out> and <end_working_out>.\nThen, provide your solution between <SOLUTION></SOLUTION>\n<|im_end|>Given $\\sqrt{x^2+165}-\\sqrt{x^2-52}=7$ and $x$ is positive, find all possible values of $x$.<|im_end|><start_working_out>Okay, let's see. I need to solve the equation √(x² + 165) - √(x² - 52) = 7, and find all positive values of x. Hmm, radicals can be tricky, but maybe if I can eliminate the square roots by squaring both sides. Let me try that.\n\nFirst, let me write down the equation again to make sure I have it right:\n\n√(x² + 165) - √(x² - 52) = 7.\n\nOkay, so the idea is to isolate one of the radicals and then square both sides. Let me try moving the second radical to the other side:\n\n√(x² + 165) = 7 + √(x² - 52).\n\nNow, if I square both sides, maybe I can get rid of the square roots. Let's do that:\n\n(√(x² + 165))² = (7 + √(x² - 52))².\n\nSimplifying the left side:

In [10]:
dataset["N"] = dataset["Messages"].apply(lambda x: len(tokenizer.apply_chat_template(x)))

dataset = dataset.loc[dataset["N"] <= max_seq_length/2].copy()
dataset.shape



(58, 5)

In [11]:


dataset["text"] = tokenizer.apply_chat_template(dataset["Messages"].values.tolist(), tokenize = False)
dataset = Dataset.from_pandas(dataset)
dataset

Dataset({
    features: ['expected_answer', 'problem', 'generated_solution', 'Messages', 'N', 'text', '__index_level_0__'],
    num_rows: 58
})

In [24]:

def gen_result(text):
    
    _ = model.generate(
        **tokenizer(text, return_tensors = "pt").to("cuda"),
        max_new_tokens = 1024, # Increase for longer outputs!
        temperature = 0.7, top_p = 0.8, top_k = 20, # For non thinking
        streamer = TextStreamer(tokenizer, skip_prompt = True),
    )



In [13]:
from trl import SFTTrainer, SFTConfig
import trl
print(trl.__version__)
trainer = SFTTrainer(
    model= model,
    tokenizer = tokenizer,
    train_dataset= dataset,
    args =SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 1,
        learning_rate = 2e-4,
        weight_decay= 0.01,
        num_train_epochs = 2,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 10,
        output_dir = "outputs",
        optim = "adamw_8bit",  # 8位优化器，节省内存
        report_to = "none",
    )
)

0.21.0


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/58 [00:00<?, ? examples/s]

In [14]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 3070 Ti Laptop GPU. Max memory = 8.0 GB.
3.426 GB of memory reserved.


In [15]:
trainer_status =trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 58 | Num Epochs = 2 | Total steps = 116
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 1 x 1) = 1
 "-____-"     Trainable parameters = 25,952,256 of 4,048,420,352 (0.64% trained)


Step,Training Loss
10,0.9719
20,0.6833
30,0.5494
40,0.4909
50,0.4423
60,0.3893
70,0.3925
80,0.4055
90,0.3454
100,0.3461


Unsloth: Will smartly offload gradients to save VRAM!


In [16]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_status.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_status.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

142.8647 seconds used for training.
2.38 minutes used for training.
Peak reserved memory = 4.238 GB.
Peak reserved memory for training = 0.812 GB.
Peak reserved memory % of max memory = 52.975 %.
Peak reserved memory for training % of max memory = 10.15 %.


In [25]:
prompt  = tokenizer.apply_chat_template(
    dataset[0]["Messages"][:2],
    tokenize = False,
    add_generation_prompt = True, # Must add for generation
)
gen_result(prompt)

Okay, let's see. So Jenifer has 82 cents in pennies and nickels. Her brother thought all her nickels were dimes and counted it as $1.47. We need to find out how many pennies she actually has.

First, let's set up the problem with variables. Let's say the number of pennies is P and the number of nickels is N. 

The total amount of money Jenifer has is 82 cents, so in terms of pennies and nickels, that's:
P + 5N = 82 cents.

But her brother counted the nickels as dimes. So he thought the total was $1.47, which is 147 cents. So he counted each nickel as a dime. So the equation he used would be:
P + 10N = 147 cents.

Now, we have two equations:
1) P + 5N = 82
2) P + 10N = 147

We can subtract the first equation from the second to eliminate P. Let's do that:
(P + 10N) - (P + 5N) = 147 - 82
10N - 5N = 65
5N = 65
N = 13

Now that we know the number of nickels is 13, we can plug back into the first equation to find P:
P + 5(13) = 82
P + 65 = 82
P = 82 - 65
P = 17

So Jenifer has 17 pennies. Le