In [None]:
from datasets import Dataset, load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from trl import RewardTrainer, RewardConfig

In [None]:
model_name = "distilbert/distilbert-base-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
def tokenize_dataset(examples):
    kwargs = {"padding": "max_length", "truncation": True, "max_length": 512, "return_tensors": "pt"}
    tokens = tokenizer(examples["text"], **kwargs)
    return {
        "input_ids": tokens["input_ids"].squeeze(),
        "attention_mask": tokens["attention_mask"].squeeze(),
        "label": examples["label"]
    }

def create_pairwise_dataset(tokenized_dataset):
    positive_reviews = tokenized_dataset.filter(lambda x: x["label"] == 1)
    negative_reviews = tokenized_dataset.filter(lambda x: x["label"] == 0)

    pairwise_data = {
        "input_ids_chosen": positive_reviews["input_ids"],
        "attention_mask_chosen": positive_reviews["attention_mask"],
        "input_ids_rejected": negative_reviews["input_ids"],
        "attention_mask_rejected": negative_reviews["attention_mask"]
    }

    return Dataset.from_dict(pairwise_data)

dataset = load_dataset("stanfordnlp/imdb")

train_dataset = dataset["train"].map(tokenize_dataset).shuffle(seed=1)
test_dataset = dataset["test"].map(tokenize_dataset)

train_dataset_pairs = create_pairwise_dataset(train_dataset)
test_dataset_pairs = create_pairwise_dataset(test_dataset)
eval_dataset_pairs = test_dataset_pairs.train_test_split(test_size=0.05)["test"]

In [7]:
print(12500**2)

156250000


In [47]:
training_args = RewardConfig(
    output_dir="./reward_model",
    max_length=512,
    eval_strategy="steps",
    eval_steps=100,
    logging_strategy="steps",
    logging_steps=100,

    num_train_epochs=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=2,
    learning_rate=5e-5,
    weight_decay=0.0,
    seed=1,
)

In [None]:
trainer = RewardTrainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train_dataset_pairs,
    eval_dataset=eval_dataset_pairs
)
trainer.train()