# Initialize Packages and Load Dataset

In [1]:
import warnings
warnings.filterwarnings('ignore')
import torch
torch.cuda.empty_cache()

In [2]:
from sklearn.model_selection import KFold
from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets
import datasets
import pandas as pd
import os
import logging
import nltk
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from random import sample
import random

In [3]:
from datasets import load_from_disk

dataset = load_from_disk('data/decomposed/decomposed_test')

# Check Test Dataset + Add Tokenizer Function

In [4]:
def generate_predictions(examples, tokenizer, model):
    generated_texts = []
    for example in examples:
        
        # Intial tokenization
        input_text = f"query:  {example['query']} answer: {example['answers']} header: {' '.join(map(str, example['table'].get('header', [])))} rows: {' '.join(map(str, example['table'].get('rows', [])))} title: {' '.join(map(str, example['table'].get('title', [])))}"
        input_ids = tokenizer.encode(input_text, return_tensors="pt")
        
        # Generate text and decode
        output_sequences = model.generate(input_ids)
        generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
        
        # Add to list of generated text
        generated_texts.append(generated_text)
    
    return generated_texts

In [8]:
dataset

# Reduce it for testing
random_indices = random.sample(range(len(dataset)), 5)
dataset = dataset.select(random_indices)

# Load in models

In [9]:
from transformers import GPT2Tokenizer, GPT2Model
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import BartTokenizer, BartForConditionalGeneration

# # gpt2
# tokenizer_gpt2 = GPT2Tokenizer.from_pretrained("gpt2")
# model_gpt2 = GPT2Model.from_pretrained("gpt2")

# t5 small
tokenizer_t5 = T5Tokenizer.from_pretrained("t5-small")
model_t5 = T5ForConditionalGeneration.from_pretrained("t5-small")

# flan t5
tokenizer_flant5 = T5Tokenizer.from_pretrained("google/flan-t5-small")
model_flant5 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")

# Bart
tokenizer_bart = BartTokenizer.from_pretrained("facebook/bart-base")
model_bart = BartForConditionalGeneration.from_pretrained("facebook/bart-base")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [10]:
models_and_tokenizers_with_names = [
    # ("GPT2", tokenizer_gpt2, model_gpt2),
    ("T5 Small", tokenizer_t5, model_t5),
    ("FLAN-T5 Small", tokenizer_flant5, model_flant5),
    ("BART Base", tokenizer_bart, model_bart)
]

# Make predictions using each Model on Test Data

In [12]:
model_predictions = {}

for name, tokenizer, model in models_and_tokenizers_with_names:
    print(f"Model: {name}")
    predictions = generate_predictions(dataset, tokenizer, model)
    model_predictions[name] = predictions

Token indices sequence length is longer than the specified maximum sequence length for this model (760 > 512). Running this sequence through the model will result in indexing errors


Model: T5 Small


Token indices sequence length is longer than the specified maximum sequence length for this model (760 > 512). Running this sequence through the model will result in indexing errors


Model: FLAN-T5 Small
Model: BART Base


In [13]:
print(model_predictions)

{'T5 Small': ["'59.941', '59.553', '59.5", "'Camden County Regiment', 'Edenton', '", 'Russia (RUS)', 'John Putch', "'2013', '2013', '2013', 'None"], 'FLAN-T5 Small': ['e x a l l G r a n d', 'd e n t o n D i s t', '', 'i s t o f U g l y B', 't e r n'], 'BART Base': ['query:  Summarize the drivers that are part of the Minardi Team Us', 'query:  Who were the original commanders and what were their ranks for units that were', 'query:  What is the distribution of the 4 x 100 metres relay records among different', 'query:  Who were the directors and writers for the episode titled "Backseat Betty', 'query:  Which school had the shortest membership duration in the Western Wayne Athletic Conference and']}


### Choosing Best Answer

In [14]:
from rouge_score import rouge_scorer
from bert_score import score
import numpy as np

def select_best_guess(models_and_tokenizers_with_names, dataset, model_predictions, weights=(0.5, 0.5)):
    weight_for_rouge, weight_for_bert = weights
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    
    best_guesses = []

    for i, example in enumerate(dataset):
        best_score = -np.inf
        best_guess_info = {}
        target_answer = example['query']
        
        for name, _, _ in models_and_tokenizers_with_names:
            predictions = model_predictions[name][i]
            
            for prediction in predictions:
                rouge_scores = scorer.score(target_answer, prediction)
                rouge_score_avg = np.mean([rouge_scores['rouge1'].fmeasure, rouge_scores['rougeL'].fmeasure])
                
                _, _, bert_scores = score([prediction], [target_answer], lang="en", verbose=False)
                bert_score = bert_scores.mean().item()
                
                # Calculate combined score based on specified weights
                combined_score = (weight_for_rouge * rouge_score_avg) + (weight_for_bert * bert_score)
                
                if combined_score > best_score:
                    best_score = combined_score
                    best_guess_info = {
                        'model': name,
                        'best_guess': prediction,
                        'query': target_answer
                    }
        
        best_guesses.append(best_guess_info)
    
    return best_guesses

In [None]:
best_guesses = select_best_guess(models_and_tokenizers_with_names, dataset, model_predictions)

In [16]:
for guess in best_guesses:
    print(guess)

{'model': 'BART Base', 'best_guess': 'M', 'query': 'Summarize the drivers that are part of the Minardi Team Usa Team.'}
{'model': 'FLAN-T5 Small', 'best_guess': 'D', 'query': 'Who were the original commanders and what were their ranks for units that were created in 1775 and disbanded in 1783 under the Edenton District Brigade operation?'}
{'model': 'BART Base', 'best_guess': '4', 'query': 'What is the distribution of the 4 x 100 metres relay records among different nations during the World Championships in Athletics from 1983 to 2015?'}
{'model': 'BART Base', 'best_guess': 'k', 'query': 'Who were the directors and writers for the episode titled "Backseat Betty" and how many viewers did this episode attract?'}
{'model': 'T5 Small', 'best_guess': '2', 'query': 'Which school had the shortest membership duration in the Western Wayne Athletic Conference and what were the circumstances surrounding its exit?'}
