### News

We use unsloth to finetune the glm4-9b-chat-hf model with OpenR1-Math-220k dataset.

### Installation

In [None]:
!pip install install unsloth

### Unsloth

In [None]:
from datasets import load_dataset

dataset = load_dataset("open-r1/OpenR1-Math-220k")

print(dataset)

# 查看第一个训练样本
sample = dataset['train'][0]
print("\n--- Problem ---")
print(sample['problem'])
print("\n--- Solution (The Chain-of-Thought) ---")
print(sample['solution'])
print("\n--- Answer ---")
print(sample['answer'])

def format_function(sample):
    """
    将原始数据样本转换为用于微调的 prompt-completion 对。
    """
    prompt_text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\boxed{{answer}}.<|eot_id|>
<|start_header_id|>user<|eot_id|>
{sample['problem']}<|eot_id|>
<|start_header_id|>assistant<|eot_id|>
"""
    
    solution_text = sample['solution'].strip()
    final_answer = sample['answer'].strip()
    
    if f"\\boxed{{{final_answer}}}" not in solution_text:
        completion_text = f"{solution_text}\n\\boxed{{{final_answer}}}"
    else:
        completion_text = solution_text

    return {"prompt": prompt_text, "completion": completion_text}

formatted_dataset = dataset.map(format_function)

train_data = formatted_dataset['train']

train_data.to_json("openr1_math_cot_formatted.jsonl", orient="records", lines=True)

In [None]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "glm-4-9b-chat-hf",
    max_seq_length = 4096,
    dtype = None,
    load_in_4bit = True,
)


model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 32,
    lora_dropout = 0,
)

dataset = load_dataset("json", data_files={"train": "openr1_math_cot_formatted.jsonl"}, split="train")

def preprocess_function(examples):
    """
    在训练前将prompt和completion拼接成一个完整的'text'字段。
    """
    texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]
    return {"text": texts}

dataset = dataset.map(preprocess_function, batched=True)

args = TrainingArguments(
    output_dir = "glm4-openr1-math",
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    learning_rate = 2e-5,
    num_train_epochs = 2,
    logging_steps = 50,
    save_strategy = "steps",
    save_steps = 1000,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    max_grad_norm = 1.0,
)


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 4096,
    args = args,
)

trainer.train()