# Question Answering Model Examples

This notebook demonstrates how to use the trained question answering models. We'll load models trained with different architectures and compare their performance on some example questions.

In [None]:
import torch
import os
import json
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display, HTML
from evaluate import load_model
from data_utils import simple_tokenize

## Load Models

First, let's find and load the latest version of each model type.

In [None]:
def find_latest_model(model_type):
    """Find the latest model of the specified type."""
    runs_dir = 'runs'
    matching_dirs = [d for d in os.listdir(runs_dir) if d.startswith(model_type)]
    if not matching_dirs:
        return None
    
    latest_dir = sorted(matching_dirs)[-1]
    model_path = os.path.join(runs_dir, latest_dir, 'best_model.pt')
    
    if os.path.exists(model_path):
        return model_path
    return None

# Find model paths
base_model_path = find_latest_model('base')
attention_model_path = find_latest_model('attention')
transformer_model_path = find_latest_model('transformer')

print(f"Base model: {base_model_path}")
print(f"Attention model: {attention_model_path}")
print(f"Transformer model: {transformer_model_path}")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load models
models = {}

if base_model_path:
    print("Loading base model...")
    base_model, _ = load_model(base_model_path)
    base_model = base_model.to(device)
    base_model.eval()
    models['base'] = base_model

if attention_model_path:
    print("Loading attention model...")
    attention_model, _ = load_model(attention_model_path)
    attention_model = attention_model.to(device)
    attention_model.eval()
    models['attention'] = attention_model

if transformer_model_path:
    print("Loading transformer model...")
    transformer_model, _ = load_model(transformer_model_path)
    transformer_model = transformer_model.to(device)
    transformer_model.eval()
    models['transformer'] = transformer_model

print(f"\nLoaded {len(models)} models")

## Helper Functions

Let's define some helper functions for generating and visualizing answers.

In [None]:
def get_answer(model, model_type, context, question, device):
    """Get an answer from a model for a given context and question."""
    # Tokenize the inputs
    context_tokens = simple_tokenize(context)
    question_tokens = simple_tokenize(question)
    
    # Truncate if needed
    max_context_len = 400
    max_question_len = 50
    
    if len(context_tokens) > max_context_len:
        context_tokens = context_tokens[:max_context_len]
    if len(question_tokens) > max_question_len:
        question_tokens = question_tokens[:max_question_len]
    
    # For demonstration, we'll use placeholder values for word indices
    # In a real scenario, you'd convert tokens to indices using the vocabulary
    context_tensor = torch.ones(1, len(context_tokens), dtype=torch.long).to(device)  # All UNK tokens
    question_tensor = torch.ones(1, len(question_tokens), dtype=torch.long).to(device)  # All UNK tokens
    
    context_len = torch.tensor([len(context_tokens)]).to(device)
    question_len = torch.tensor([len(question_tokens)]).to(device)
    
    # Get predictions
    with torch.no_grad():
        if model_type == 'base':
            start_idx, end_idx = model.predict(context_tensor, context_len, question_tensor, question_len)
            attention = None
        else:
            start_idx, end_idx, attention = model.predict(context_tensor, context_len, question_tensor, question_len)
    
    # Get the predicted span
    start = start_idx.item()
    end = end_idx.item()
    
    # Ensure start <= end
    if start > end:
        start, end = end, start
    
    # Extract the answer
    answer_tokens = context_tokens[start:end+1]
    answer = ' '.join(answer_tokens)
    
    result = {
        'question': question,
        'context': context,
        'answer': answer,
        'start': start,
        'end': end,
        'context_tokens': context_tokens
    }
    
    if attention is not None:
        result['attention'] = attention[0].cpu().numpy()
    
    return result

def highlight_answer(context, start, end):
    """Highlight the answer in the context for display."""
    tokens = simple_tokenize(context)
    highlighted = []
    
    for i, token in enumerate(tokens):
        if start <= i <= end:
            highlighted.append(f"<b style='color:red'>{token}</b>")
        else:
            highlighted.append(token)
    
    return ' '.join(highlighted)

def visualize_attention(context_tokens, attention_weights, title="Attention Weights"):
    """Visualize attention weights."""
    plt.figure(figsize=(12, 4))
    plt.imshow(attention_weights, cmap='viridis')
    plt.title(title)
    plt.xlabel('Context Position')
    
    # Show tokens on x-axis
    if len(context_tokens) <= 50:  # Only show tokens if not too many
        plt.xticks(range(len(context_tokens)), context_tokens, rotation=90)
    
    plt.colorbar()
    plt.tight_layout()
    plt.show()

## Test with Example 1: Deep Learning

In [None]:
# Example context and question about deep learning
context1 = """
Deep learning is part of a broader family of machine learning methods based on artificial neural networks with representation learning. 
Learning can be supervised, semi-supervised or unsupervised. Deep learning architectures such as deep neural networks, 
deep belief networks, recurrent neural networks and convolutional neural networks have been applied to fields including 
computer vision, speech recognition, natural language processing, audio recognition, social network filtering, 
machine translation, bioinformatics, drug design, medical image analysis, material inspection and board game programs, 
where they have produced results comparable to and in some cases surpassing human expert performance.
Transformer architectures, a type of deep learning model, have become particularly important for NLP tasks 
since their introduction in 2017. Transformers use self-attention mechanisms to process input sequences in parallel, 
which has proven especially effective for tasks like machine translation, text summarization, and question answering. 
Models like BERT, GPT, and T5 are all based on the transformer architecture and have set new performance benchmarks 
across numerous language understanding tasks.
"""

questions1 = [
    "What is deep learning part of?",
    "What types of learning can be used in deep learning?",
    "In which fields has deep learning been applied?"
]

# Test all models with the questions
for question in questions1:
    print(f"\n{'='*80}\nQuestion: {question}\n{'='*80}\n")
    
    for model_type, model in models.items():
        print(f"Model: {model_type.upper()}")
        result = get_answer(model, model_type, context1, question, device)
        
        print(f"Answer: {result['answer']}\n")
        display(HTML(f"<p>Context with highlighted answer:</p><p>{highlight_answer(context1, result['start'], result['end'])}</p>"))
        
        # Visualize attention if available
        if 'attention' in result and model_type != 'base':
            visualize_attention(result['context_tokens'], result['attention'], f"{model_type.capitalize()} Attention")
        
        print('\n' + '-'*80)

## Test with Example 2: Natural Language Processing

In [None]:
# Example context about NLP
context2 = """
Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence 
concerned with the interactions between computers and human language, in particular how to program computers 
to process and analyze large amounts of natural language data. The goal is a computer capable of "understanding" 
the contents of documents, including the contextual nuances of the language within them. The technology can then 
accurately extract information and insights contained in the documents as well as categorize and organize the 
documents themselves. Challenges in natural language processing frequently involve speech recognition, natural 
language understanding, and natural language generation. Modern NLP approaches are based on machine learning, 
especially statistical methods and neural networks. As of 2020, deep learning approaches such as transformers 
have achieved state-of-the-art results on many NLP tasks.
Question answering (QA) is an important NLP task that involves automatically answering questions posed in natural language. 
Machine reading comprehension, a subset of QA, focuses on answering questions based on a given context passage. 
The Stanford Question Answering Dataset (SQuAD) has become a benchmark dataset for this task, consisting of questions 
posed by crowdworkers on a set of Wikipedia articles. In SQuAD, the answer to every question is a segment of text 
from the corresponding reading passage. Models are evaluated based on exact match and F1 scores, comparing their 
predicted answers against human-provided reference answers.
"""

questions2 = [
    "What is NLP?",
    "What are the challenges in natural language processing?",
    "What approaches do modern NLP systems use?"
]

# Test with attention model only for brevity
if models:
    model_type = 'attention' if 'attention' in models else list(models.keys())[0]
    model = models[model_type]
    for question in questions2:
        print(f"\n{'='*80}\nQuestion: {question}\n{'='*80}\n")
        
        result = get_answer(model, model_type, context2, question, device)
        print(f"Answer: {result['answer']}\n")
        display(HTML(f"<p>Context with highlighted answer:</p><p>{highlight_answer(context2, result['start'], result['end'])}</p>"))
        
        # Visualize attention if available
        if 'attention' in result and model_type != 'base':
            visualize_attention(result['context_tokens'], result['attention'], f"{model_type.capitalize()} Attention")
        
        print('\n' + '-'*80)
else:
    print("No models loaded, cannot run example 2.")

## Try Your Own Question

Now you can try your own context and question!

In [None]:
# Define your own context and question
my_context = """
PyTorch is an open source machine learning framework based on the Torch library, 
used for applications such as computer vision and natural language processing, 
originally developed by Meta AI and now part of the Linux Foundation umbrella. 
It is free and open-source software released under the Modified BSD license. 
Although the Python interface is more polished and the primary focus of development, 
PyTorch also has a C++ interface. PyTorch provides two high-level features: 
Tensor computing (like NumPy) with strong acceleration via graphics processing units (GPU) 
and Deep neural networks built on a tape-based automatic differentiation system.
PyTorch is distinctive in its implementation of dynamic computational graphs, which allow for 
more flexible model building compared to static graph frameworks. This 'define-by-run' approach 
enables developers to modify neural networks on the fly, making debugging and experimentation easier. 
The framework includes modules for building complex neural network architectures, optimizers for 
training, data loading utilities, and seamless GPU integration. Its ecosystem has expanded 
to include libraries like torchvision for computer vision, torchaudio for audio processing, 
torchtext for NLP, and PyTorch Lightning for organizing research code. With its intuitive design 
and Python-native flow, PyTorch has become especially popular in research communities.
"""

my_question = "Who developed PyTorch?"

# Choose which model to use (using attention model if available)
if models:
    model_type = 'attention' if 'attention' in models else list(models.keys())[0]
    model = models[model_type]
    
    # Get answer
    result = get_answer(model, model_type, my_context, my_question, device)
    
    print(f"Question: {result['question']}\n")
    print(f"Answer: {result['answer']}\n")
    display(HTML(f"<p>Context with highlighted answer:</p><p>{highlight_answer(my_context, result['start'], result['end'])}</p>"))
    
    # Visualize attention if available
    if 'attention' in result and model_type != 'base':
        visualize_attention(result['context_tokens'], result['attention'], f"{model_type.capitalize()} Attention")
else:
    print("No models loaded, cannot answer custom question.")