### pip install

In [None]:
!uv pip install --system --upgrade --force-reinstall --no-cache-dir -qqq \
    "torch>=2.8.0" \
    "triton>=3.4.0" \
    "numpy==2.1.2" \
    torchvision \
    bitsandbytes \
    git+https://github.com/huggingface/transformers \
    git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels \
    "unsloth_zoo[base] @ git+https://github.com/unslothai/unsloth-zoo" \
    "unsloth[base] @ git+https://github.com/unslothai/unsloth"

### Unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 1024 # Reduced max_seq_length
dtype = None

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gpt-oss-20b",
    dtype = dtype,
    max_seq_length = max_seq_length,
    load_in_4bit = True,
    full_finetuning = False,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8, # 0以上の値を入れる。8, 16, 32, 64, 128 など
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0, # なんでもいいが 0 が最適らしい
    bias = "none",    # なんでもいいが "none" が最適らしい
    # ↓ "unsloth" にすると 30% 少ない VRAM かつ倍のサイズのバッチサイズにできるらしい
    use_gradient_checkpointing = "unsloth", # 長いコンテキスト長の時は True or "unsloth" にする
    random_state = 3407,
    use_rslora = False,  # rank stabilized LoRA
    loftq_config = None, # LoftQ
)

### Fine tuning前の推論性能をチェック

In [None]:
from transformers import TextStreamer

messages = [
    {"role": "system", "content": "reasoning language: Japanese\n\nYou are a helpful assistant that can solve mathematical problems."},
    {"role": "user", "content": "Solve x^5 + 3x^4 - 10 = 3."},
]
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
    return_dict = True,
    reasoning_effort = "medium", # Reasoning の強度を high, medium, low で指定
).to(model.device)

_ = model.generate(**inputs, max_new_tokens = 1024, streamer = TextStreamer(tokenizer))

### Dataset

In [None]:
from datasets import load_dataset
from unsloth.chat_templates import standardize_sharegpt

def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

dataset = load_dataset("HuggingFaceH4/Multilingual-Thinking", split="train")
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)

print(dataset[0]['text'])

### Fine tuning

In [None]:
from trl import SFTConfig, SFTTrainer
from transformers import DataCollatorForSeq2Seq

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    args = SFTConfig(
        per_device_train_batch_size = 1, # Set to 1 to start with smallest batch size
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
        # max_seq_length = max_seq_length, # Use the reduced max_seq_length
    ),
)

trainer.train()

### Fine tuning後の推論性能をチェック

In [None]:
from transformers import TextStreamer

messages = [
    {"role": "system", "content": "reasoning language: Japanese\n\nYou are a helpful assistant that can solve mathematical problems."},
    {"role": "user", "content": "Solve x^5 + 3x^4 - 10 = 3."},
]
inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
    return_dict = True,
    reasoning_effort = "medium",
).to(model.device)

_ = model.generate(**inputs, max_new_tokens = 2048, streamer = TextStreamer(tokenizer))