# Demo of RobBERT for humour ranking

If you're looking for the RobBERT joke classification notebook, you probably want [the other notebook: robbert_humor_detection](./robbert_humor_detection.ipynb).

This notebook checks how well the RobBERT model trained to distinguish jokes from dynamic templates jokes in **ranked setting** on the whole dataset.
This means that given a joke and a corrupted joke, the model should predict which one is the real joke.

Note we are running this on the whole datasets for simplicity sake, implying that the the found accuracies are higher than reported in the paper as this notebook also includes the training data instead of only the test data.

In [1]:
import json
from typing import List
import torch
from torch import Tensor
from torch.utils.data import DataLoader, Dataset
from transformers import RobertaTokenizer, AutoModelForSequenceClassification, AutoConfig, \
    RobertaForSequenceClassification, InputFeatures

In [2]:
model_location = "../models/jokes-dt-ranked/artifacts/"
tokenizer = RobertaTokenizer.from_pretrained(model_location, model_max_length=512)
model = RobertaForSequenceClassification.from_pretrained(model_location, return_dict=True)
if torch.cuda.is_available():
    model.to('cuda:0')

model.eval()
print("RobBERT model loaded")


RobBERT model loaded


## Loading data

In [3]:
with open("../data/processed/jokes.json", encoding="utf-8") as json_file:
    jokes = json.load(json_file)
with open("../data/processed/dynamic_template_jokes.json", encoding="utf-8") as json_file:
    dt = json.load(json_file)

## Calculating statistics

Create a function to calculate how many are labeled as what

In [4]:
def tokenize_sentences(left_sentences,
                 right_sentences,
                 block_size=512,
                 mask_padding_with_zero=True):
    result = []
    for left, right in zip(left_sentences, right_sentences):
        tokenized_text = tokenizer.encode(tokenizer.tokenize(left),
                                          text_pair=tokenizer.tokenize(right),
                                          truncation=True,
                                          max_length=block_size,
                                          padding=False
                                          # padding='max_length'
                                          )


        input_mask = [1 if mask_padding_with_zero else 0] * len(tokenized_text)
        pad_token = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)

        while len(tokenized_text) < block_size:
            tokenized_text.append(pad_token)
            input_mask.append(0 if mask_padding_with_zero else 1)

        result.append(
            {
                "input_ids": Tensor(tokenized_text[0: block_size]),
                "attention_mask": Tensor(input_mask[0: block_size]),
            }
        )
    return result

batch_size = 20
def label_sentences(left_sentences: List[str], right_sentences: List[str]):

    predicted_ids = []

    dataset = tokenize_sentences(left_sentences, right_sentences)

    dataloader = DataLoader(dataset, batch_size=batch_size)

    with torch.no_grad():
        for i, inputs in enumerate(dataloader):

            # Print a marker every 50 batches
            if i % 50 == 0:
                print("Starting batch", i)

            # Put batch on GPU
            if torch.cuda.is_available():
                for k, v in inputs.items():
                    if isinstance(v, torch.Tensor):
                        inputs[k] = v.to('cuda:0').long()

            # Calculate predictions
            results = model(**inputs)

            # Map to a concrete prediction & log
            predicted_ids.extend(results.logits.argmax(axis=1))

    num_left = len([i for i in predicted_ids if i == 1])
    num_right = len([i for i in predicted_ids if i == 0])

    return {
        "Left is joke": num_left,
        "Right is joke": num_right,
        "% Left Jokes": num_left / len(left_sentences),
        "% Right": num_right / len(left_sentences),
    }


In [5]:
# Should predict high number of jokes on the left
label_sentences(jokes, dt)

Starting batch 0
Starting batch 50
Starting batch 100
Starting batch 150


{'Left is joke': 2550,
 'Right is joke': 685,
 '% Left Jokes': 0.7882534775888718,
 '% Right': 0.2117465224111283}

In [6]:
# Inverse: Should predict high number of jokes on the right
label_sentences(dt, jokes)


Starting batch 0
Starting batch 50
Starting batch 100
Starting batch 150


{'Left is joke': 411,
 'Right is joke': 2824,
 '% Left Jokes': 0.12704791344667696,
 '% Right': 0.872952086553323}