### Part 1 - Imports

In [9]:
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
from datasets import load_dataset
import torch
import torch.nn as nn
import pytorch_lightning as pl
import openai
import numpy as np
import torch.optim as optim

### Part 2 - Annotating the data

In [3]:
def evaluate_pairwise(prompt, summaries, api_key, model="GPT-3.5turbo"):
    """
    Evaluates and ranks summaries using GPT-3.5turbo API by comparing each pairwise.

    Parameters:
    - prompt (str): The prompt text.
    - summaries (list of str): List of summaries to be ranked.
    - api_key (str): OpenAI API key.
    - model (str): GPT model to use.

    Returns:
    - pairwise_comparisons (dict): Dictionary with pairwise comparisons.
    """
    openai.api_key = api_key

    # Dictionary to store pairwise comparisons
    pairwise_comparisons = {}

    num_summaries = len(summaries)
    
    # Compare each pair of summaries
    for i in range(num_summaries):
        for j in range(i + 1, num_summaries):
            summary1 = summaries[i]
            summary2 = summaries[j]
            
            query = f"Prompt: {prompt}\n\nCompare the following summaries and rank them from 1 (better) to 2 (worse):\n\n1. {summary1}\n2. {summary2}\n\nRank:"
            
            response = openai.Completion.create(
                model=model,
                prompt=query,
                max_tokens=10,
                n=1,
                stop=None,
                temperature=0.0
            )
            
            # Extract the ranking from the response
            response_text = response.choices[0].text.strip()
            ranking = int(response_text)
            
            # Store the result as (summary1, summary2) -> ranking
            pairwise_comparisons[(i, j)] = ranking

    return pairwise_comparisons

In [6]:
def aggregate_rankings(pairwise_comparisons, num_summaries):
    """
    Aggregates pairwise comparisons into final rankings.

    Parameters:
    - pairwise_comparisons (dict): Dictionary with pairwise comparisons.
    - num_summaries (int): Number of summaries.

    Returns:
    - final_ranking (list of int): List of final rankings (1-based index).
    """
    # Initialize ranking scores
    scores = np.zeros(num_summaries)
    
    # Calculate scores based on pairwise comparisons
    for (i, j), ranking in pairwise_comparisons.items():
        if ranking == 1:
            scores[i] += 1
        else:
            scores[j] += 1
    
    # Rank summaries based on scores
    final_ranking = np.argsort(-scores) + 1

    return final_ranking.tolist()

### Part 3 - Loading the annotated data

In [10]:
class AnnotatedSummaryDataset(Dataset):
    """
    class AnnotatedSummaryDataset

    Parameters:
    - dataset (dataset): Dataset containing article and summary.
    - tokenizer (transformers): tokenizer used.

    Returns:
    - inputs (torch): encoded prompt and summary.
    - label (int): ranking of the data.
    """
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        prompt = item['article']
        summaries = item['summary']
        rankings = item['rankings']

        inputs = []
        labels = []
        for summary, rank in zip(summaries, rankings):
            input_text = f"[CLS] {prompt} [SEP] {summary} [SEP]"
            inputs.append(self.tokenizer(input_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt'))
            labels.append(rank - 1)

        return inputs, labels

In [11]:
data = [
    {
        'prompt': 'Article text CNN...',
        'summaries': ['summary 1', 'summary 2'],
        'rankings': [1, 2]
    },
]

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = AnnotatedSummaryDataset(data, tokenizer)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

### Part 4.1 - Building the BERT Classifier

In [15]:
class BERTSummaryRanker(pl.LightningModule):
    """
    class BERTSummaryRanker: Initialize the Reward model

    Parameters:
    - model_name (str): Name of the model to initialize.
    - lr (float): learning rate lr.
    - num_labels (int): number of labels per data

    Returns:
    - inputs (torch): encoded prompt and summary.
    - label (int): ranking of the data.
    """
    def __init__(self, model_name='bert-base-uncased', lr=5e-5, num_labels=2):
        super(BERTSummaryRanker, self).__init__()
        self.model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
        self.lr = lr

    def forward(self, input_ids, attention_mask, token_type_ids):
        return self.model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        input_ids = torch.cat([item['input_ids'] for item in inputs], dim=0).squeeze()
        attention_mask = torch.cat([item['attention_mask'] for item in inputs], dim=0).squeeze()
        token_type_ids = torch.cat([item['token_type_ids'] for item in inputs], dim=0).squeeze()
        labels = torch.tensor(labels).flatten()

        outputs = self(input_ids, attention_mask, token_type_ids)
        loss = outputs.loss
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.lr)
        return optimizer

In [None]:
ranker = BERTSummaryRanker(model_name='bert-base-uncased', lr=5e-5)
trainer = pl.Trainer(max_epochs=3, accelerator='gpu' if torch.cuda.is_available() else 'cpu')
trainer.fit(ranker, dataloader)

### Part 4.2 - Building the Classic Reward Model

In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer

class AnnotatedSummaryDataset(Dataset):
    """
    class AnnotatedSummaryDataset

    Parameters:
    - dataset (dataset): Dataset containing article and summary.
    - tokenizer (transformers): tokenizer used.

    Returns:
    - inputs (dict): dictionnary containing input_ids, attention_mask, token_type_ids.
    - labels (torch): ranking of the data.
    """
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        prompt = item['prompt']
        summaries = item['summaries']
        rankings = item['rankings']

        inputs = []
        labels = []
        for summary, rank in zip(summaries, rankings):
            input_text = f"{prompt} [SEP] {summary}"
            encoded_input = self.tokenizer(input_text, max_length=self.max_length, padding='max_length', truncation=True, return_tensors='pt')
            inputs.append(encoded_input)
            labels.append(rank - 1)

        input_ids = torch.stack([inp['input_ids'].squeeze() for inp in inputs])
        attention_mask = torch.stack([inp['attention_mask'].squeeze() for inp in inputs])
        token_type_ids = torch.stack([inp['token_type_ids'].squeeze() for inp in inputs])

        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids}, torch.tensor(labels)

In [None]:
data = [
    {
        'prompt': 'Article text CNN...',
        'summaries': ['summary 1', 'summary 2'],
        "rankings": [1, 2]
    },
]

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = AnnotatedSummaryDataset(data, tokenizer)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
from transformers import BertForSequenceClassification
from trl import RewardTrainer

class RewardModel(BertForSequenceClassification):
    """
    class RewardModel: Initialize the Reward model

    Parameters:
    - model_name (str): Name of the model to initialize.
    - num_labels (int): number of labels per data
    """
    def __init__(self, model_name='bert-base-uncased', num_labels=4):
        super().__init__(config=BertForSequenceClassification.from_pretrained(model_name).config)
        self.num_labels = num_labels
        self.model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None):
        """
        def forward: Infer the model with an input

        Parameters:
        - input_ids (torch): torch containing input_ids, 
        - attention_mask (torch): torch containing attention_mask,
        - token_type_ids (torch): torch containing token_type_ids,
        - labels (torch): rankings of the data.

        Returns:
        - outputs (int): label for the data
        """
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels)
        return outputs

In [None]:
reward_model = RewardModel(model_name='bert-base-uncased', num_labels=4)

trainer = RewardTrainer(
    model=reward_model,
    train_dataset=dataset,
    eval_dataset=None,
    tokenizer=tokenizer,
    args={
        'output_dir': './results',
        'num_train_epochs': 3,
        'per_device_train_batch_size': 2,
        'per_device_eval_batch_size': 2,
        'warmup_steps': 10,
        'weight_decay': 0.01,
        'logging_dir': './logs',
        'logging_steps': 10,
    }
)

trainer.train()