### News

We use unsloth to finetune the glm4-9b-chat-hf model with OpenR1-Math-220k dataset.

### Installation

In [None]:
!pip install install unsloth

### Unsloth

We use the open-r1/OpenR1-Math-220k, we need to format the data to `prompt-completion` pair.

In [None]:
from datasets import load_dataset

dataset = load_dataset("open-r1/OpenR1-Math-220k")

print(dataset)

# See the first sample
sample = dataset['train'][0]
print("\n--- Problem ---")
print(sample['problem'])
print("\n--- Solution (The Chain-of-Thought) ---")
print(sample['solution'])
print("\n--- Answer ---")
print(sample['answer'])

def format_function(sample):
    prompt_text = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert mathematician. Solve the following problem. Think step by step. End your response with a final answer in the format \\boxed{{answer}}.<|eot_id|>
<|start_header_id|>user<|eot_id|>
{sample['problem']}<|eot_id|>
<|start_header_id|>assistant<|eot_id|>
"""
    
    solution_text = sample['solution'].strip()
    final_answer = sample['answer'].strip()
    
    if f"\\boxed{{{final_answer}}}" not in solution_text:
        completion_text = f"{solution_text}\n\\boxed{{{final_answer}}}"
    else:
        completion_text = solution_text

    return {"prompt": prompt_text, "completion": completion_text}

formatted_dataset = dataset.map(format_function)

train_data = formatted_dataset['train']

train_data.to_json("openr1_math_cot_formatted.jsonl", orient="records", lines=True)

Now we get the formatted data, we can use it to train a model.

In [None]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "zai-org/glm-4-9b-chat-hf",
    max_seq_length = 4096,
    dtype = None,
    load_in_4bit = True,
)


model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 32,
    lora_dropout = 0,
)

dataset = load_dataset("json", data_files={"train": "openr1_math_cot_formatted.jsonl"}, split="train")

def preprocess_function(examples):
    texts = [p + c for p, c in zip(examples['prompt'], examples['completion'])]
    return {"text": texts}

dataset = dataset.map(preprocess_function, batched=True)

args = TrainingArguments(
    output_dir = "glm4-openr1-math",
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    learning_rate = 2e-5,
    num_train_epochs = 2,
    logging_steps = 50,
    save_strategy = "steps",
    save_steps = 1000,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    max_grad_norm = 1.0,
)


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 4096,
    args = args,
)

trainer.train()

Great, Now we got the model ready, let's see how it works. If you don't want to train the model from scratch, you can run:

```bash
pip install modelscope
modelscope download MengAiDev/GLM4-OpenR1 --local_dir GLM4-OpenR1
```

This model only trained with 6,250 steps because of the time limit. So it maybe not as good as the model that I expected to train for 20,000 steps.

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch

class MathProblemSolver:
    def __init__(self, base_model_path: str, lora_path: str):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        
        self.tokenizer = AutoTokenizer.from_pretrained(
            base_model_path,
            trust_remote_code=True
        )
        
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        self.model = AutoModelForCausalLM.from_pretrained(
            base_model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        
        # load lora
        self.model = PeftModel.from_pretrained(self.model, lora_path)
        self.model.eval()
        
        print("Model loaded successfully with LoRA adapter!")

    def create_prompt(self, problem: str) -> str:
        return f"""Solve this math problem carefully and step by step. 
Provide detailed reasoning and put your final answer in \\boxed{{}}.

Problem: {problem}

Solution:"""

    def solve(self, problem: str) -> str:
        prompt = self.create_prompt(problem)
        
        inputs = self.tokenizer(
            prompt, 
            return_tensors="pt",
            truncation=True,
            max_length=1024
        ).to(self.device)
        
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=512,
                temperature=0.1,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                repetition_penalty=1.1
            )
        
        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return full_response[len(prompt):].strip()

def main():
    BASE_MODEL_PATH = "zai-org/glm-4-9b-chat-hf"
    LORA_PATH = "GLM4-OpenR1" 
    
    solver = MathProblemSolver(BASE_MODEL_PATH, LORA_PATH)

    problems = [
        "If x + y = 10 and x - y = 4, what is the value of x?",
        "Find the sum of the roots of the equation x^2 - 5x + 6 = 0.",
    ]
    
    for i, problem in enumerate(problems, 1):
        print(f"\n{'='*50}")
        print(f"Problem #{i}: {problem}")
        print(f"{'='*50}")
        
        solution = solver.solve(problem)
        print("Model's Solution:")
        print(solution)
        print(f"{'='*50}")

if __name__ == "__main__":
    main()