# Supervised Fine-Tuning with SFTTrainer

In [1]:
# Authenticate to Hugging Face
from huggingface_hub import login
from dotenv import load_dotenv
import os

# 加载.env文件
load_dotenv()
hf_token = os.getenv("HF_TOKEN")

login(hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [8]:
# 导入必要的包
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTConfig, SFTTrainer, setup_chat_format
import torch

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

# Load the model and tokenizer
model_name = "HuggingFaceTB/SmolLM2-135M"
cache_dir = "../../huggingface"
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    cache_dir=cache_dir
).to(device)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name, cache_dir=cache_dir)

# Set up the chat format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

# Set our name for the finetune to be saved &/ uploaded to
finetune_name = "SmolLM2-FT-MyDataset"
finetune_tags = ["smol-course", "xcs_module"]

# Generate with the base model
尝试没有聊天模板的基本模型

In [4]:
# Let's test the base model before training
prompt = "Write a haiku about programming"

# Format with template
messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)

# Generate response
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=100)
print("Before training:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Before training:
user
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a


# 数据集准备
加载一个示例数据集并对其进行格式化以进行训练。数据集应使用输入-输出对进行结构化，其中每个输入都是一个提示，输出是模型的预期响应。

TRL将根据模型的聊天模板设置输入消息的格式。它们需要表示为具有键的字典列表：role 和 content。

In [7]:
# Load a sample dataset
from datasets import load_dataset

ds = load_dataset(path="HuggingFaceTB/smoltalk", name="everyday-conversations")

ds

DatasetDict({
    train: Dataset({
        features: ['full_topic', 'messages'],
        num_rows: 2260
    })
    test: Dataset({
        features: ['full_topic', 'messages'],
        num_rows: 119
    })
})

In [None]:
# TODO: 🦁 If your dataset is not in a format that TRL can convert to the chat template, you will need to process it. 
# Refer to the [module](../chat_templates.md)
#EXAMPLE
def formatting_func(example):
    text = (f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n"
            f"{example['question']}<|eot_id|><|start_header_id|>"
            f"assistant<|end_header_id|>\n\n{example['answer']}<|eot_id|>")
    return {"text" : text}

# Configuring the SFTTrainer
SFTTrainer 配置了控制训练过程的各种参数。这些指标包括训练步骤的数量、批量大小、学习率和评估策略。根据特定要求和计算资源调整这些参数

In [None]:
# Configure the SFTTrainer
sft_config = SFTConfig(
    output_dir="./sft_output",
    max_steps=1000,  # 根据数据集大小和所需的训练持续时间进行调整
    per_device_train_batch_size=4,  # 根据GPU内存容量设置
    learning_rate=5e-5,  # 微调的一般性起点
    logging_steps=10,  # 日志训练指标的频率
    save_steps=100,  # 保存模型检查点的频率
    evaluation_strategy="steps",  # 定期评估模型
    eval_steps=50,  # 评估频率
    use_mps_device=(
        True if device == "mps" else False
    ),  # 使用 MPS 进行混合精度训练
    hub_model_id=finetune_name,  # 为模型设置一个唯一的名称
)

class CustomTrainer(SFTTrainer):
    def log(self, logs):
        super().log(logs)
        if "loss" in logs:
            print(f"Train Loss: {logs['loss']:.4f}")
        if "eval_loss" in logs:
            print(f"Eval Loss: {logs['eval_loss']:.4f}")


# Initialize the SFTTrainer
trainer = CustomTrainer(
    model=model,
    args=sft_config,
    train_dataset=ds["train"],
    tokenizer=tokenizer,
    eval_dataset=ds["test"],
)

## Training the Model 
配置好trainer后，现在可以继续训练模型。训练过程将涉及迭代数据集、计算损失以及更新模型的参数以最大限度地减少这种损失。

In [None]:
# Train the model
trainer.train()

# Save the model
trainer.save_model(f"./{finetune_name}")

## Bonus Exercise: Generate with fine-tuned model

🐕 使用微调模型生成响应

In [None]:
# Test the fine-tuned model on the same prompt

# Let's test the base model before training
prompt = "Write a haiku about programming"

# Format with template
messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)

# Generate response
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
prompt = "Write a haiku about programming"

inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=100)
print("After training:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))