# BiRNN-CRF vs Simple CRF Evaluation

This notebook compares performance between BiRNN-CRF and simple stationary CRF models.

In [8]:
! pip install seaborn
import torch
from pathlib import Path
import matplotlib.pyplot as plt
#import seaborn as sns
import pandas as pd
from typing import Dict, List
from pathlib import Path
import time, os

from corpus import TaggedCorpus
from lexicon import build_lexicon
from crf_backprop import ConditionalRandomFieldBackprop
from crf_neural import ConditionalRandomFieldNeural
from eval import model_cross_entropy, viterbi_error_rate

os.chdir("../data")



In [20]:

# Modify train_and_evaluate function
def train_and_evaluate(model_class, train_corpus, eval_corpus, **kwargs) -> Dict:
    start_time = time.time()
    
    # Create model using training corpus vocab/tagset
    model = model_class(train_corpus.tagset, train_corpus.vocab, **kwargs)
    
    # Ensure eval corpus uses same vocab/tagset 
    if not hasattr(eval_corpus, 'tagset'):
        eval_corpus = TaggedCorpus('endev', tagset=train_corpus.tagset, vocab=train_corpus.vocab)
    
    loss = lambda x: model_cross_entropy(x, eval_corpus)

    # Rest of function remains the same
    model.train(
        corpus=train_corpus,
        loss=loss,
        minibatch_size=kwargs.get('batch_size', 30),
        lr=kwargs.get('lr', 0.05), 
        reg=kwargs.get('reg', 0.0),
        max_steps=kwargs.get('max_steps', 2000)
    )
    
    training_time = time.time() - start_time
    
    return {
        'dev_cross_entropy': model_cross_entropy(model, eval_corpus),
        'dev_error_rate': viterbi_error_rate(model, eval_corpus),
        'train_cross_entropy': model_cross_entropy(model, train_corpus),
        'train_error_rate': viterbi_error_rate(model, train_corpus),
        'training_time': training_time
    }

In [15]:
# Load data
# Load data 
train_corpus = TaggedCorpus(Path('ensup'))  
eval_corpus = TaggedCorpus(Path('endev'), tagset=train_corpus.tagset, vocab=train_corpus.vocab)


# Create lexicon
lexicon = build_lexicon(train_corpus, embeddings_file=Path('words-10.txt'))

## 1. BiRNN-CRF vs Simple CRF Comparison

In [18]:
# Baseline CRF
baseline_results = train_and_evaluate(
    ConditionalRandomFieldBackprop,
    train_corpus,
    dev_corpus
)

# BiRNN-CRF
birnn_results = train_and_evaluate(
    ConditionalRandomFieldNeural,
    train_corpus,
    dev_corpus,
    rnn_dim=10,
    lexicon=lexicon,
    corpus=train_corpus
)

print("Baseline CRF Results:")
for k, v in baseline_results.items():
    print(f"{k}: {v:.4f}")

print("\nBiRNN-CRF Results:")
for k, v in birnn_results.items():
    print(f"{k}: {v:.4f}")

  0%|          | 0/25 [00:00<?, ?it/s]




TypeError: The corpus that this sentence came from uses a different tagset or vocab

## 2. Hyperparameter Analysis

In [None]:
# Test different hyperparameter combinations
results = []

rnn_dims = [16, 32, 64]
learning_rates = [0.01, 0.05, 0.1]
batch_sizes = [10, 30, 50]
reg_values = [0.0, 0.001, 0.01]

for rnn_dim in rnn_dims:
    for lr in learning_rates:
        for batch_size in batch_sizes:
            for reg in reg_values:
                print(f"Testing: rnn_dim={rnn_dim}, lr={lr}, batch_size={batch_size}, reg={reg}")
                
                metrics = train_and_evaluate(
                    ConditionalRandomFieldNeural,
                    train_corpus,
                    dev_corpus,
                    rnn_dim=rnn_dim,
                    lexicon=lexicon,
                    corpus=train_corpus,
                    lr=lr,
                    batch_size=batch_size,
                    reg=reg
                )
                
                results.append({
                    'rnn_dim': rnn_dim,
                    'lr': lr,
                    'batch_size': batch_size,
                    'reg': reg,
                    **metrics
                })

results_df = pd.DataFrame(results)

In [None]:
# Visualize impact of hyperparameters
fig, axes = plt.subplots(2, 2, figsize=(15, 15))

# RNN Dimension Impact
sns.boxplot(data=results_df, x='rnn_dim', y='dev_cross_entropy', ax=axes[0,0])
axes[0,0].set_title('RNN Dimension vs Cross Entropy')

# Learning Rate Impact
sns.boxplot(data=results_df, x='lr', y='dev_cross_entropy', ax=axes[0,1])
axes[0,1].set_title('Learning Rate vs Cross Entropy')

# Batch Size Impact
sns.boxplot(data=results_df, x='batch_size', y='dev_cross_entropy', ax=axes[1,0])
axes[1,0].set_title('Batch Size vs Cross Entropy')

# Regularization Impact
sns.boxplot(data=results_df, x='reg', y='dev_cross_entropy', ax=axes[1,1])
axes[1,1].set_title('Regularization vs Cross Entropy')

plt.tight_layout()
plt.show()

## 3. Training Speed Analysis

In [None]:
# Plot training time vs hyperparameters
fig, axes = plt.subplots(2, 2, figsize=(15, 15))

sns.boxplot(data=results_df, x='rnn_dim', y='training_time', ax=axes[0,0])
axes[0,0].set_title('RNN Dimension vs Training Time')

sns.boxplot(data=results_df, x='lr', y='training_time', ax=axes[0,1])
axes[0,1].set_title('Learning Rate vs Training Time')

sns.boxplot(data=results_df, x='batch_size', y='training_time', ax=axes[1,0])
axes[1,0].set_title('Batch Size vs Training Time')

sns.boxplot(data=results_df, x='reg', y='training_time', ax=axes[1,1])
axes[1,1].set_title('Regularization vs Training Time')

plt.tight_layout()
plt.show()

## 4. Training vs. Evaluation Performance

In [None]:
# Compare training vs dev performance
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.scatter(results_df['train_cross_entropy'], results_df['dev_cross_entropy'])
plt.xlabel('Training Cross Entropy')
plt.ylabel('Dev Cross Entropy')
plt.title('Cross Entropy: Training vs Dev')

plt.subplot(1, 2, 2)
plt.scatter(results_df['train_error_rate'], results_df['dev_error_rate'])
plt.xlabel('Training Error Rate')
plt.ylabel('Dev Error Rate')
plt.title('Error Rate: Training vs Dev')

plt.tight_layout()
plt.show()