In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter
import pickle
import os

# Create a directory for NLTK data
nltk_data_dir = os.path.join(os.getcwd(), 'nltk_data')
os.makedirs(nltk_data_dir, exist_ok=True)

# Set the NLTK data path
nltk.data.path.append(nltk_data_dir)

# Download punkt to the specified directory
nltk.download('punkt_tab', download_dir=nltk_data_dir)

[nltk_data] Downloading package punkt_tab to /content/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [14]:
from attention_layer import AttentionLayer
from bilstm_with_attention import BiLSTMAttention
from evidence_detection_dataset import EvidenceDetectionDataset
from early_stopping import EarlyStopping
from vocabulary import Vocabulary
from trainer import Trainer

In [None]:
def load_glove_embeddings(vocab, embedding_dim=300):
    """Load GloVe embeddings for words in vocabulary"""
    embeddings = np.zeros((len(vocab), embedding_dim))

    # Initialize random embeddings
    for i in range(len(vocab)):
        embeddings[i] = np.random.normal(scale=0.1, size=(embedding_dim, ))

    # Load pretrained embeddings
    glove_path = f'glove.6B.{embedding_dim}d.txt'

    if not os.path.exists(glove_path):
        print(f"GloVe embeddings not found at {glove_path}. Using random embeddings.")
        return embeddings

    print(f"Loading GloVe embeddings from {glove_path}...")

    with open(glove_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(tqdm(f, desc="Loading GloVe")):
            try:
                values = line.split()

                # Check if the vector has the correct dimension
                if len(values) != embedding_dim + 1:  # +1 for the word itself
                    print(f"Warning: Line {i} has {len(values)} values, expected {embedding_dim + 1}. Skipping.")
                    continue

                word = values[0]
                if word in vocab.stoi:
                    vector = np.array(values[1:], dtype='float32')

                    # Double-check vector dimension
                    if len(vector) != embedding_dim:
                        print(f"Warning: Vector for word '{word}' has dimension {len(vector)}, expected {embedding_dim}. Skipping.")
                        continue

                    embeddings[vocab.stoi[word]] = vector
            except Exception as e:
                print(f"Error processing line {i}: {e}")
                continue

    print(f"Loaded {embedding_dim}-dimensional GloVe embeddings.")
    return embeddings

In [15]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create output directory for models and plots
os.makedirs('models', exist_ok=True)
# os.makedirs('plots', exist_ok=True)

# Load datasets
print("Loading datasets...")
train_df = pd.read_csv('./data/train.csv')
val_df = pd.read_csv('./data/dev.csv')

print(f"Train shape: {train_df.shape}, Validation shape: {val_df.shape}")
print(f"Label distribution in train: {train_df['label'].value_counts().to_dict()}")
print(f"Label distribution in val: {val_df['label'].value_counts().to_dict()}")

# Load or create vocabulary
vocab_path = 'vocab.pkl'
if os.path.exists(vocab_path):
    print(f"Loading vocabulary from {vocab_path}")
    with open(vocab_path, 'rb') as f:
        vocab = pickle.load(f)
else:
    print("Creating new vocabulary")
    vocab = Vocabulary(freq_threshold=3)
    all_texts = train_df['Claim'].tolist() + train_df['Evidence'].tolist()
    vocab.build_vocabulary(all_texts)

    # Save vocabulary
    with open(vocab_path, 'wb') as f:
        pickle.dump(vocab, f)

print(f"Vocabulary size: {len(vocab)}")

# Load GloVe embeddings
embedding_dim = 300
embeddings = load_glove_embeddings(vocab, embedding_dim)

# Create datasets
train_dataset = EvidenceDetectionDataset(train_df, vocab)
val_dataset = EvidenceDetectionDataset(val_df, vocab)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

# Model hyperparameters
hidden_dim = 256
num_layers = 1
dropout = 0.4

# Initialize model
model = BiLSTMAttention(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    hidden_dim=hidden_dim,
    num_layers=num_layers,
    dropout=dropout,
    pretrained_embeddings=embeddings
)

# Print model architecture summary
print("\nModel Architecture:")
print(f"Vocabulary Size: {len(vocab)}")
print(f"Embedding Dimension: {embedding_dim}")
print(f"Hidden Dimension: {hidden_dim}")
print(f"Number of LSTM Layers: {num_layers}")
print(f"Dropout Rate: {dropout}")

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}")

# Training hyperparameters
batch_size = 32
learning_rate = 1e-3
weight_decay = 1e-5
num_epochs = 15

# Initialize trainer
trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    batch_size=batch_size,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    device=device
)

# Train model
print("\nStarting training...")
trainer.train(num_epochs=num_epochs)

# Final evaluation
print("\nPerforming final evaluation on validation set...")
val_metrics = trainer.evaluate()

print("\nFinal Validation Metrics:")
print(f"Loss: {val_metrics['loss']:.4f}")
print(f"Accuracy: {val_metrics['accuracy']:.4f}")
print(f"Precision: {val_metrics['precision']:.4f}")
print(f"Recall: {val_metrics['recall']:.4f}")
print(f"F1 Score: {val_metrics['f1']:.4f}")

# Save final model
final_model_path = 'models/final_model.pt'
trainer.save_model(final_model_path)

print("\nTraining and evaluation completed!")

Using device: cuda
Loading datasets...
Train shape: (21508, 3), Validation shape: (5926, 3)
Label distribution in train: {0: 15654, 1: 5854}
Label distribution in val: {0: 4286, 1: 1640}
Loading vocabulary from vocab.pkl
Vocabulary size: 14401
Loading GloVe embeddings from glove.6B.300d.txt...


Loading GloVe: 400001it [00:07, 54091.14it/s]


Loaded 300-dimensional GloVe embeddings.
Train dataset size: 21508
Validation dataset size: 5926

Model Architecture:
Vocabulary Size: 14401
Embedding Dimension: 300
Hidden Dimension: 256
Number of LSTM Layers: 1
Dropout Rate: 0.4
Total Parameters: 5,398,832
Trainable Parameters: 5,398,832

Starting training...
Starting training on device: cuda
Training set size: 21508
Validation set size: 5926


Training: 100%|██████████| 673/673 [00:18<00:00, 36.92it/s]
Validating: 100%|██████████| 186/186 [00:02<00:00, 71.46it/s]


Epoch 1/15 - Time: 20.93s
Train Loss: 0.4317, Train F1: 0.5666
Val Loss: 0.3938, Val F1: 0.6736
Val Precision: 0.6572, Val Recall: 0.6909
Validation F1 increased (0.000000 --> 0.673603). Saving model...


Training: 100%|██████████| 673/673 [00:18<00:00, 36.97it/s]
Validating: 100%|██████████| 186/186 [00:02<00:00, 71.38it/s]


Epoch 2/15 - Time: 20.92s
Train Loss: 0.3315, Train F1: 0.7163
Val Loss: 0.3700, Val F1: 0.6583
Val Precision: 0.7170, Val Recall: 0.6085
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 673/673 [00:18<00:00, 37.19it/s]
Validating: 100%|██████████| 186/186 [00:02<00:00, 71.13it/s]


Epoch 3/15 - Time: 20.80s
Train Loss: 0.2470, Train F1: 0.8079
Val Loss: 0.4107, Val F1: 0.6784
Val Precision: 0.6757, Val Recall: 0.6811
Validation F1 increased (0.673603 --> 0.678409). Saving model...


Training: 100%|██████████| 673/673 [00:18<00:00, 37.18it/s]
Validating: 100%|██████████| 186/186 [00:02<00:00, 71.36it/s]


Epoch 4/15 - Time: 20.80s
Train Loss: 0.1544, Train F1: 0.8883
Val Loss: 0.6256, Val F1: 0.6425
Val Precision: 0.6484, Val Recall: 0.6366
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 673/673 [00:18<00:00, 36.84it/s]
Validating: 100%|██████████| 186/186 [00:02<00:00, 71.39it/s]


Epoch 5/15 - Time: 20.96s
Train Loss: 0.0866, Train F1: 0.9419
Val Loss: 0.7397, Val F1: 0.6385
Val Precision: 0.6411, Val Recall: 0.6360
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 673/673 [00:18<00:00, 37.06it/s]
Validating: 100%|██████████| 186/186 [00:02<00:00, 70.56it/s]


Epoch 6/15 - Time: 20.89s
Train Loss: 0.0540, Train F1: 0.9655
Val Loss: 0.9310, Val F1: 0.6240
Val Precision: 0.6935, Val Recall: 0.5671
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'

Performing final evaluation on validation set...


Validating: 100%|██████████| 186/186 [00:02<00:00, 71.54it/s]



Final Validation Metrics:
Loss: 0.4107
Accuracy: 0.8213
Precision: 0.6757
Recall: 0.6811
F1 Score: 0.6784
Model saved to models/final_model.pt

Training and evaluation completed!
