In [None]:
%pip install datasets transformers bitsandbytes peft evaluate gdown accelerate

In [2]:
import torch
import pandas as pd

from datasets import Dataset

from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig

from peft import LoraConfig, get_peft_model, TaskType

from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR

from tqdm import tqdm

from accelerate import Accelerator

from dpoloss import dpo_loss

from eval import evaluate_preference_alignment

In [3]:
accelerator = Accelerator(
    gradient_accumulation_steps=64,
    mixed_precision="bf16",
    cpu=False,
    kwargs_handlers=[],
)

device = accelerator.device
print(f"Using device: {device}")

Using device: cuda


In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True, llm_int8_threshold=6.0, llm_int8_has_fp16_weight=False
)

base_model = AutoModel.from_pretrained(
    "GSAI-ML/LLaDA-8B-Instruct",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(
    "GSAI-ML/LLaDA-8B-Instruct", trust_remote_code=True
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.truncation_side = "left"

MASK_TOKEN_ID = 126336

In [5]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "attn_out"],
    bias="none",
)

base_model = get_peft_model(base_model, lora_config)
base_model.print_trainable_parameters()

trainable params: 8,388,608 || all params: 8,023,969,792 || trainable%: 0.1045


In [6]:
ref_model = AutoModel.from_pretrained(
    "GSAI-ML/LLaDA-8B-Instruct",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config,
    device_map="auto",
)

for param in ref_model.parameters():
    param.requires_grad = False

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [7]:
base_model.train()
ref_model.eval()

LLaDAModelLM(
  (model): LLaDAModel(
    (transformer): ModuleDict(
      (wte): Embedding(126464, 4096)
      (emb_drop): Dropout(p=0.0, inplace=False)
      (ln_f): RMSLayerNorm()
      (blocks): ModuleList(
        (0-31): 32 x LLaDALlamaBlock(
          (dropout): Dropout(p=0.0, inplace=False)
          (act): SiLU()
          (attn_out): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (ff_out): Linear8bitLt(in_features=12288, out_features=4096, bias=False)
          (rotary_emb): RotaryEmbedding()
          (attn_norm): RMSLayerNorm()
          (ff_norm): RMSLayerNorm()
          (q_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (ff_proj): Linear8bitLt(in_features=4096, out_features=12288, bias=False)
          (up_proj): Linear8bitLt(in_features=4096, out_features

In [8]:
import gdown
import zipfile

file_id = "1YoLmzdiYVb7DwQUdCGbMp0BuXGVohLGS"
url = f"https://drive.google.com/uc?id={file_id}"

gdown.download(url, output="data.zip", quiet=False)

with zipfile.ZipFile("data.zip", "r") as zip_ref:
    zip_ref.extractall()

Downloading...
From (original): https://drive.google.com/uc?id=1YoLmzdiYVb7DwQUdCGbMp0BuXGVohLGS
From (redirected): https://drive.google.com/uc?id=1YoLmzdiYVb7DwQUdCGbMp0BuXGVohLGS&confirm=t&uuid=8ba9363b-4b73-44bd-bb92-a1d7c127d176
To: /data.zip
100%|██████████| 69.1M/69.1M [00:02<00:00, 34.3MB/s]


In [9]:
import zipfile

with zipfile.ZipFile("data.zip", "r") as zip_ref:
    zip_ref.extractall("data")

In [10]:
train_dataset_clean = pd.read_csv("./data/train_dataset_clean.csv").to_dict("records")
eval_dataset_clean = pd.read_csv("./data/eval_dataset_clean.csv").to_dict("records")
test_dataset_clean = pd.read_csv("./data/test_dataset_clean.csv").to_dict("records")

print(
    f"Loaded {len(train_dataset_clean)} train samples, {len(eval_dataset_clean)} eval samples, and {len(test_dataset_clean)} test samples"
)

Loaded 142510 train samples, 15835 eval samples, and 8408 test samples


In [11]:
def get_tokenized(input, max_length=512):
    return tokenizer(
        input, return_tensors="pt", padding=True, truncation=True, max_length=max_length
    )


preference_dataset_train = Dataset.from_list(train_dataset_clean[:1024])

prompts_train = get_tokenized([each["prompt"] for each in preference_dataset_train])
choosen_train = get_tokenized([each["chosen"] for each in preference_dataset_train])
rejected_train = get_tokenized([each["rejected"] for each in preference_dataset_train])

In [12]:
batch_size = 1
grad_accum_steps = 64

effective_batch_size = batch_size * grad_accum_steps

steps_per_epoch = len(preference_dataset_train) // effective_batch_size

epochs = 1

max_steps = steps_per_epoch * epochs

optimizer = AdamW(base_model.parameters(), lr=5e-6)

warmup_steps = max_steps // 20

warmup_scheduler = LinearLR(
    optimizer,
    start_factor=0.01,
    end_factor=1.0,
    total_iters=warmup_steps,
)

cosine_scheduler = CosineAnnealingLR(
    optimizer,
    T_max=max_steps - warmup_steps,
    eta_min=1e-7,
)

scheduler = SequentialLR(
    optimizer,
    schedulers=[warmup_scheduler, cosine_scheduler],
    milestones=[warmup_steps],
)

In [13]:
base_model, ref_model, optimizer, scheduler = accelerator.prepare(
    base_model, ref_model, optimizer, scheduler
)

In [17]:
import warnings

warnings.filterwarnings(
    "ignore",
    message="MatMul8bitLt: inputs will be cast from .* to float16 during quantization",
)


In [None]:
step = 0

for epoch in range(epochs):
    epoch_loss = 0
    num_batches_processed = 0

    for i in tqdm(range(0, len(preference_dataset_train), batch_size)):
        batch_end = min(i + batch_size, len(preference_dataset_train))

        prompts_ids = prompts_train["input_ids"][i:batch_end].to(device)
        chosen_ids = choosen_train["input_ids"][i:batch_end].to(device)
        rejected_ids = rejected_train["input_ids"][i:batch_end].to(device)

        with accelerator.accumulate(base_model):
            loss = dpo_loss(
                base_model,
                ref_model,
                prompts_ids,
                chosen_ids,
                rejected_ids,
                MASK_TOKEN_ID,
                device,
            )

            accelerator.backward(loss)

            if accelerator.sync_gradients:
                accelerator.clip_grad_norm_(base_model.parameters(), max_norm=1.0)

            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        epoch_loss += loss.item()
        num_batches_processed += 1

        del prompts_ids, chosen_ids, rejected_ids

        if accelerator.sync_gradients:
            step += 1

            average_loss = epoch_loss / num_batches_processed

            print(f"Step {step}, Epoch {epoch + 1}, Avg Loss: {average_loss:.4f}")

            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()

In [None]:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(base_model)
unwrapped_model.save_pretrained("./saved_model", safe_serialization=True)

In [22]:
import gc

gc.collect()

torch.cuda.empty_cache()

torch.cuda.ipc_collect()

if torch.cuda.is_available():
    torch.cuda.synchronize()
    torch.cuda.reset_peak_memory_stats()

In [25]:
evaluate_preference_alignment(ref_model, test_dataset_clean, get_tokenized, device)

100%|██████████| 32/32 [03:25<00:00,  6.42s/it]


Results on HH-RLHF (256 samples):
Wins     : 141 (55.08%)
Losses   : 115 (44.92%)
Ties     : 0 (0.00%)





In [26]:
evaluate_preference_alignment(base_model, test_dataset_clean, get_tokenized, device)

100%|██████████| 32/32 [03:37<00:00,  6.79s/it]


Results on HH-RLHF (256 samples):
Wins     : 151 (58.98%)
Losses   : 105 (41.02%)
Ties     : 0 (0.00%)



