# **Supervised Fine-Tuning**

model: "SeaLLMs/SeaLLMs-v3-7B"

## **Import Libraries**

In [1]:
## **Environment Configuration**
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3,4,5"    # adjust as needed

In [2]:
import gc
import time
import torch
import pandas as pd
from tqdm.auto import tqdm

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# ensure we see full contexts
pd.set_option("display.max_colwidth", None)

# reproducibility
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == "cuda":
    for i in range(torch.cuda.device_count()):
        print(f"  • GPU {i}: {torch.cuda.get_device_name(i)}")

Using device: cuda
  • GPU 0: NVIDIA A100-SXM4-40GB
  • GPU 1: NVIDIA A100-SXM4-40GB
  • GPU 2: NVIDIA A100-SXM4-40GB
  • GPU 3: NVIDIA A100-SXM4-40GB
  • GPU 4: NVIDIA A100-SXM4-40GB
  • GPU 5: NVIDIA A100-SXM4-40GB


In [5]:
## **Global Variables**
MODEL_ID      = "SeaLLMs/SeaLLMs-v3-7B"
CACHE_DIR     = "../model_cache"
OUTPUT_DIR    = "./checkpoints/seallms-v3-7b-lora"
BATCH_SIZE    = 16
EPOCHS        = 3
LEARNING_RATE = 5e-5

LORA_R       = 16
LORA_ALPHA   = 32
LORA_DROPOUT = 0.05

MAX_LENGTH   = 512

## **Load Datasets**

In [10]:
dataset_path = "../datasets/cqa_sft.jsonl"
df = pd.read_json(dataset_path, lines=True)
df.sample(1)

Unnamed: 0,context,question,answer,file_url,regulation_number,title,filename,n_pairs_requested
0,"Bank yang menerbitkan sertifikat deposito diwajibkan untuk menerapkan manajemen risiko, program anti pencucian uang, dan pencegahan pendanaan terorisme. Penerapan ini harus sesuai dengan ketentuan yang berlaku dan bertujuan untuk melindungi konsumen serta menjaga stabilitas sistem keuangan. Selain itu, bank juga harus menyampaikan laporan berkala mengenai sertifikat deposito yang diterbitkan kepada Otoritas Jasa Keuangan.",Apa kewajiban bank yang menerbitkan sertifikat deposito menurut peraturan ini?,"Kewajiban bank yang menerbitkan sertifikat deposito termasuk menerapkan manajemen risiko, program anti pencucian uang, dan menyampaikan laporan berkala kepada Otoritas Jasa Keuangan.",https://www.ojk.go.id/id/regulasi/Documents/Pages/POJK-tentang-Penerbitan-Sertifikat-Deposito-oleh-Bank/POJK%2010.%20Penerbitan%20Sertifikat%20Deposito%20oleh%20Bank.pdf,10/POJK.03/2015,POJK tentang Penerbitan Sertifikat Deposito oleh Bank,ojk-peraturan_ojk-10_pojk_03_2015-14072015-pojk_tentang_penerbitan_sertifikat_deposito_oleh_bank.pdf,4


In [11]:
# define prompt function
def make_prompt(example):
    return (
        example["context"].strip()
        + "\n\nPertanyaan: " + example["question"].strip()
        + "\nJawaban: "  + example["answer"].strip()
    )

In [12]:
raw_ds = Dataset.from_pandas(df)
tokenized = raw_ds.map(
    lambda ex: {"text": make_prompt(ex)},
    remove_columns=raw_ds.column_names,
    num_proc=4,
    desc="Creating prompts"
)

Creating prompts (num_proc=4): 100%|██████████| 515/515 [00:00<00:00, 2491.34 examples/s]


## **Load Tokenizer & Model**

In [13]:
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    cache_dir=CACHE_DIR,
    use_fast=True
)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    cache_dir=CACHE_DIR,
    torch_dtype=torch.float16,
    device_map="auto"
)
base_model.config.pad_token_id = tokenizer.eos_token_id

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Loading checkpoint shards: 100%|██████████| 7/7 [00:05<00:00,  1.17it/s]


## **LoRA Adapter**

In [14]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=["q_proj", "v_proj"]
)
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

trainable params: 5,046,272 || all params: 7,620,662,784 || trainable%: 0.0662


## **Preprocess Data**

In [15]:
def tokenize_fn(examples):
    toks = tokenizer(
        examples["text"],
        max_length=MAX_LENGTH,
        truncation=True,
        padding="max_length"
    )
    # Labels = input_ids untuk causal LM
    toks["labels"] = toks["input_ids"].copy()
    return toks

In [16]:
train_tokenized = tokenized.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"],
    num_proc=4,
    desc="Tokenizing"
)

Tokenizing (num_proc=4): 100%|██████████| 515/515 [00:00<00:00, 1521.51 examples/s]


In [17]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

## **Fine-Tuning**

In [18]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    logging_steps=50,
    save_steps=200,
    save_total_limit=3,
    fp16=True,
    gradient_accumulation_steps=1,
    report_to="none"
)

In [22]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_tokenized,
    args=training_args,
    data_collator=data_collator,
)

Truncating train dataset: 100%|██████████| 515/515 [00:00<00:00, 87099.46 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [23]:
# Jalankan pelatihan
trainer.train()

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:5 and cuda:0!

In [None]:
# Simpan model dan tokenizer
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

## **Evaluation**