# Model Evaluation

- hella swag
- sequence generation

In [1]:
import torch 
import torch.nn.functional as F
import tiktoken
import os
import requests 
from tqdm import tqdm
import json
from transformers import GPT2LMHeadModel

model = GPT2LMHeadModel.from_pretrained('gpt2')
encoder = tiktoken.get_encoding('gpt2')

## 1. sentence generation
- learning 1: apply temperature to the logits to change distribution (not included in the original code)
- learning 2: 

    - model.eval(): no dropout - important for good perf; batch norm mean is using stored values - does not apply here
    - torch.no_grad(): no wasted compute and memory on gradient tracking

In [2]:
@torch.no_grad() # learning 2
def complete_sentence(model, 
                      encoder, 
                      text: str, 
                      n_examples: int = 4, 
                      max_n_generated_tokens: int = 30, 
                      top_k_to_include_in_random_draw: int = 20, 
                      temperature: float = 0.6):
    
    was_training = model.training

    model.eval() # learning 2

    #with torch.no_grad():
        
        # text to tensor
    tokens = encoder.encode_ordinary(text)
    tensor = torch.tensor(tokens).unsqueeze(0).repeat(n_examples, 1).to(model.device) # B * T 

    for _ in range(max_n_generated_tokens):

        # get probability for the next token
        logits = model(tensor).logits[:, -1, :] 
        probs = F.softmax(logits / temperature, dim = -1) # learning 1
        
        # top k samples
        top_probs, top_idx = torch.topk(probs, k = top_k_to_include_in_random_draw, dim=-1)   # B * k
        selected_idx_on_top_probs = torch.multinomial(top_probs, 1) # B * 1
        
        next_tokens = torch.gather(top_idx, -1, selected_idx_on_top_probs) # B * 1
        
        # concat the new token with existing
        tensor = torch.cat([tensor, next_tokens], dim = -1)

    # decode
    decoded = []
    for i in range(n_examples):

        tokens = tensor[i, :].tolist()
        if encoder.eot_token in tokens:
            tokens = tokens[: tokens.index(encoder.eot_token)]
        
        decoded.append(encoder.decode(tokens))

    if was_training:
        model.train()

    return decoded

complete_sentence(model, encoder, "What makes a person resilient")

["What makes a person resilient is that they can be resilient. In order to be resilient, you have to be able to adapt to change.\n\nIf you're not strong",
 "What makes a person resilient?\n\nIf you're a person who's never been able to cope with a lot of stress, you can't always take the time to figure",
 'What makes a person resilient is how they can be resilient. If they are going to get through adversity, they have to be resilient. If they are going to get through adversity',
 "What makes a person resilient?\n\nIn the first place, being resilient has more to do with the person's capacity for survival. If you're not going to die,"]

## 2. hellaswag
learning 3: how to send requests to get data and save by chunk

learning 4: use contiguous after slicing, which makes subsequent view and operations smoother

In [3]:
# utils to download hellaswag and write to file
def _download_file(url: str, file_loc: str, chunk_size: int = 1024): # learning 3

    resp = requests.get(url)
    total = int(resp.headers.get("content-length", 0))

    with open(file_loc, "wb") as file, tqdm(desc = file_loc, 
                                        total =  total, 
                                        unit = 'iB',
                                        unit_scale = True,
                                        unit_divisor = 1024) as bar:
        
        for data in resp.iter_content(chunk_size = chunk_size):
            size = file.write(data)
            bar.update(size)


def download_hellaswag(split: str = 'train') -> None:

    hellaswags = {
    "train": "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_train.jsonl",
    "val": "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_val.jsonl",
    "test": "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_test.jsonl",}


    local_dir, filename = "hellaswag", f"hellaswag_{split}.jsonl"
    os.makedirs(local_dir, exist_ok=True)
    
    file_loc = os.path.join(local_dir, filename)
    url = hellaswags[split]

    if not os.path.exists(file_loc):
        print(f"download hellaswag {url} to {file_loc}")
        _download_file(url, file_loc)

In [6]:
def iter_hellaswag(split: str = 'val'):
    download_hellaswag(split)
    with open(f"hellaswag/hellaswag_{split}.jsonl", "r") as f:
        n = 0
        for line in f:
            example = json.loads(line)
            yield example
            n += 1
            if n >= 50:
                break

def render_example(example, encoder):

    context  = example['ctx']
    label = int(example['label'])
    endings = example['endings']

    # create tokens
    context_tokens = encoder.encode_ordinary(context)
    context_len = len(context_tokens)

    masks = []
    tokens = []
    max_len = 0
    for ending in endings:
        ending_tokens = encoder.encode_ordinary(' ' + ending)
        ending_len = len(ending_tokens)
        max_len = max(max_len, context_len + ending_len)

        masks.append([0]* context_len + [1]*ending_len)
        tokens.append(context_tokens + ending_tokens)
     
    # convert to padded tensors
    padded_masks = torch.zeros((4, max_len), dtype=torch.long)
    padded_tokens = torch.zeros((4, max_len), dtype =torch.long)

    for i in range(4):
        curr_len = len(tokens[i])
        padded_masks[i, :curr_len] = torch.tensor(masks[i])
        padded_tokens[i, :curr_len] = torch.tensor(tokens[i])

    return padded_tokens, padded_masks, label

# # test if usable
# iterator = iter_hellaswag('val')
# for example in iterator:
#     tokens, masks, label = render_example(example, encoder)
#     break 

# print(tokens, masks, label)

In [7]:

@torch.no_grad()
def eval_hellaswag(iterator, model, encoder):

    was_training = model.training
    model.eval()

    num_correct_norm = 0
    num_correct = 0
    num_total = 0

    for example in iterator:
        tokens, masks, label = render_example(example, encoder)

        tokens = tokens.to(model.device)
        x = tokens[:, :-1].contiguous()
        y = tokens[:, 1:].contiguous()         
        masks = masks.to(model.device)[:, 1:]  # B * T-1
        B = x.shape[0]

        # get prob
        logits = model(x).logits

        losses = F.cross_entropy(logits.view(-1, logits.shape[-1]), y.contiguous().view(-1), reduction='none').view(B, -1)

        masked_losses = losses * masks 
        
        total_losses = masked_losses.sum(dim =-1)
        avg_losses = total_losses / masks.sum(dim=-1)
        
        # eval if accurate
        num_correct += total_losses.argmin().item() == label 
        num_correct_norm += avg_losses.argmin().item() == label 
        num_total += 1

    print(f"evaluated {num_total} examples: {num_correct_norm} correct using avg prob. {num_correct} correct using total prob")

    if was_training:
        model.train()

    return num_correct_norm, num_correct, num_total

In [8]:
eval_hellaswag(iterator, model, encoder)

evaluated 49 examples: 16 correct using avg prob. 17 correct using total prob


(16, 17, 49)