<a href="https://colab.research.google.com/github/yaya-sy/LLMReasoningCourse/blob/main/labs/lab2/lab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 2: Improving the output quality of LLMs with repeated sampling

We will see how to improve LLMs for French to English translation by scaling the translation candidates and using a reward score to pick the best translation.

In [None]:
!pip install datasets==2.20.0

# Load model

In [None]:
from datasets import load_dataset

raw_datasets = load_dataset("haoranxu/X-ALMA-Preference", split="train")
# get only 100 sentences for evaluation
raw_datasets = raw_datasets.filter(lambda x: x["directions"] == "fr-en")
subset = raw_datasets.select(range(100))

We will use `HuggingFaceTB/SmolLM2-360-Instruct` for this lab.

In [None]:
# load the model and the tokenizer. Load the model on the GPU if available
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct", dtype=torch.bfloat16, token="")
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")

model = model.to(device)

# Part 1: Self-Confidence

In this section we will use self-confidence as reward.

In [None]:
!pip install sacrebleu evaluate

In [None]:
import evaluate
metric = evaluate.load("sacrebleu")

In [None]:
from torch.nn import functional as F
from typing import Tuple, List

@torch.no_grad()
def log_likelihood(texts: List[Tuple[str, str]], batch_size: int=16):
    loglikelihoods = []

    def tokenize(texts: List[Tuple[str, str]]):
        conversations = [
            [
                {"role": "user",
                "content": f"You are given a French text, provide faithful translation to English.\n\nThe French text: {text}"
                },
                {"role": "assistant",
                "content": f"{translation}"
                }
            ]
            for text, translation in texts
        ]

        templated = tokenizer.apply_chat_template(conversations, tokenize=False, add_generation_prompt=False)
        tokenized = tokenizer(templated, padding_side="right", padding=True, return_tensors="pt")

        # we need to find where the user message ends so we can ignore it for the prob computation. Remember we're only interested in p(translation|source)
        prompts_only = [
            tokenizer.apply_chat_template(
                [conv[0]],
                tokenize=False,
                add_generation_prompt=True
            )
            for conv in conversations
        ]
        prompt_tokenized = tokenizer(prompts_only, padding_side="right", padding=True, return_tensors="pt")
        prompt_lengths = (prompt_tokenized.input_ids != tokenizer.pad_token_id).sum(dim=1)

        # Now mask the source sentence (user message), keep assistant response (translation)
        labels = tokenized.input_ids.clone()
        labels[labels == tokenizer.pad_token_id] = -100

        for i, prompt_len in enumerate(prompt_lengths):
            labels[i, :prompt_len] = -100

        tokenized["labels"] = labels
        return tokenized

    for start in range(0, len(texts), batch_size):
        batch = texts[start:start+batch_size]
        tokenized = tokenize(batch).to(model.device)
        labels = tokenized.labels[:, 1:].long().contiguous()
        del tokenized["labels"]
        logits = model(**tokenized, output_hidden_states=True).logits
        logits = logits[:, :-1, :].float()
        log_probs = F.log_softmax(logits, dim=-1)
        b, s, *_ = log_probs.shape
        nll_loss = F.nll_loss(
            log_probs.view(-1, logits.shape[-1]),
            labels.view(-1),
            ignore_index=-100,
            reduction="none"
        ).view(b, s)

        valid_mask = (labels != -100).float()
        per_sample_nll = (nll_loss * valid_mask).sum(dim=1) / valid_mask.sum(dim=1)
        loglikelihoods.extend((-per_sample_nll).tolist())

    return torch.tensor(loglikelihoods)

In [None]:
from tqdm import tqdm

def tokenize(texts: List[str]):
    """Tokenize the texts"""
    conversations = [
        [{"role": "user",
          "content": f"You are given a French text, provide faithful translation to English.\n\nThe French text: {text}"}]
        for text in texts]
    # TODO: call apply_chat_template from the tokenizer class with the right arguments to output torch tensors of the token ids
    # Which padding side to use? why?
    templated = tokenizer.apply_chat_template(conversations, tokenize=False, add_generation_prompt=True)
    tokenized = tokenizer(templated, padding_side="left", padding=True, return_tensors="pt")
    return tokenized

@torch.no_grad()
def rollouts(corpus: List[str], batch_size: int=4, num_samples=4):
    """For each example in the batch, generate `num_samples` translation."""
    data = sorted(enumerate(corpus), key=lambda x: tokenize([x[1]]).input_ids.shape[-1])

    results = [None] * len(corpus)

    for i in tqdm(range(0, len(data), batch_size)):
        indices, texts = zip(*data[i : i + batch_size])

        tokenized = tokenize(texts)
        input_ids = tokenized.input_ids.to(model.device)
        b, s = input_ids.shape
        input_ids = input_ids.repeat_interleave(num_samples, dim=0)
        attention_mask = tokenized.attention_mask.to(model.device)
        attention_mask = attention_mask.repeat_interleave(num_samples, dim=0)

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            temperature=1.0,
            top_p=0.4,
            max_new_tokens=64,
            do_sample=True
        )
        outputs = outputs.view(b, num_samples, -1)

        for idx, predicted_ids in zip(indices, outputs):
            generated_tokens = predicted_ids[:, s:]
            translations = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
            print(corpus[idx], "@@@", translations)
            print("---")
            results[idx] = translations

    return results

In [None]:
results = rollouts(french_texts)

## Likelihood self-confidence

1. Translate the French corpus with num_samples `[1, 2, 4, 8, 16]`
2. Compute the log-likelihood of the translation using the LLM itsself
3. Pick the most likely translation for each example
4. Compute the bleu score, and observe the results

## Entropy self-confidence

Modify the function `log_likelihood` to compute the entropy. Repeat the previous experiment but with entropy.

# Reward modeling

We will train a reward model for translation quality scoring.

## Data

1. Get 1000 translation pairs from the dataset.
2. Translate the French texts to English with repeated sampling (num_samples=4)
3. Use the gold English texts as prefered translations and the translations of the model as rejected translations

# Model

We will use the Bradley-Terry model:

$$
p(y_c > y_r|x) = \frac{\exp(\mathbf{c}_p \cdot \mathbf{w}^T)}{\exp(\mathbf{c}_p \cdot \mathbf{w}^T) + \exp(\mathbf{c}_r \cdot \mathbf{w}^T)}
$$

where:

- $\mathbf{c}_p = \text{Transformer}(\mathbf{x}, y_p)$
- $\mathbf{c}_r = \text{Transformer}(\mathbf{x}, y_r)$

1. Derive the formula to get logistic regression model
2. Complete the class to implement a reward model
3. Train the reward model

In [None]:
from torch import nn
import torch.nn.functional as F

class RewardModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.transformer = ...
        self.regression_head = ...

    def loss_fn(prefered, rejected):
        loss = -(F.logsigmoid(...))
        return loss

    def forward(input_ids, attention_mask):
        hidden_states = ...
        logits = ...

        return logits

In [None]:
def train(reward_model, prompts, prefered, rejected, lr=0.001):
    """ Training loop."""
    pass

Now use this reward model to pick the best translations, and evaluate with bleu.