# Evidence Detection

## Data is in the form of: Claim, Evidence, Labels

## Labels
- **1 (Relevant)** - The evidence supports or is related to the claim.
- **0 (Not Relevant)** – The evidence does not support or is unrelated to the claim.

### Dependency Management

In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.8/231.8 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.1 colorlog-6.9.0 optuna-4.2.1


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter
import pickle
import os
import optuna

# Create a directory for NLTK data
nltk_data_dir = os.path.join(os.getcwd(), 'nltk_data')
os.makedirs(nltk_data_dir, exist_ok=True)

# Set the NLTK data path
nltk.data.path.append(nltk_data_dir)

# Download punkt to the specified directory
nltk.download('punkt_tab', download_dir=nltk_data_dir)

[nltk_data] Downloading package punkt_tab to /content/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [4]:
from evidence_detection.bilstm_with_attention import BiLSTMAttention
from evidence_detection.evidence_detection_dataset import EvidenceDetectionDataset
from evidence_detection.vocabulary import Vocabulary
from evidence_detection.trainer import Trainer

In [5]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create output directory for models and plots
os.makedirs('models', exist_ok=True)
# os.makedirs('plots', exist_ok=True)

Using device: cuda


### Define Glove Embedding Method

In [6]:
def load_glove_embeddings(vocab, glove_path, embedding_dim=300):
    """Load GloVe embeddings for words in vocabulary"""
    embeddings = np.zeros((len(vocab), embedding_dim))

    # Initialize random embeddings
    for i in range(len(vocab)):
        embeddings[i] = np.random.normal(scale=0.1, size=(embedding_dim, ))

    # Load pretrained embeddings
    if not os.path.exists(glove_path):
        print(f"GloVe embeddings not found at {glove_path}. Using random embeddings.")
        return embeddings

    print(f"Loading GloVe embeddings from {glove_path}...")

    with open(glove_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(tqdm(f, desc="Loading GloVe")):
            try:
                values = line.split()

                # Check if the vector has the correct dimension
                if len(values) != embedding_dim + 1:  # +1 for the word itself
                    print(f"Warning: Line {i} has {len(values)} values, expected {embedding_dim + 1}. Skipping.")
                    continue

                word = values[0]
                if word in vocab.stoi:
                    vector = np.array(values[1:], dtype='float32')

                    # Double-check vector dimension
                    if len(vector) != embedding_dim:
                        print(f"Warning: Vector for word '{word}' has dimension {len(vector)}, expected {embedding_dim}. Skipping.")
                        continue

                    embeddings[vocab.stoi[word]] = vector
            except Exception as e:
                print(f"Error processing line {i}: {e}")
                continue

    print(f"Loaded {embedding_dim}-dimensional GloVe embeddings.")
    return embeddings

### Get Data, build vocabulary and Build Glove Embeddings

In [7]:
# Load datasets
print("Loading datasets...")
# train_df = pd.read_csv('./data/train.csv')
# val_df = pd.read_csv('./data/dev.csv')
print("Using train.csv for train and validation")
print("Using dev.csv for testing")
train_df = pd.read_csv('./data/train.csv')
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
test_df = pd.read_csv('./data/dev.csv')

print(f"Train shape: {train_df.shape}, Validation shape: {val_df.shape}")
print(f"Label distribution in train: {train_df['label'].value_counts().to_dict()}")
print(f"Label distribution in val: {val_df['label'].value_counts().to_dict()}")

Loading datasets...
Using train.csv for train and validation
Using dev.csv for testing
Train shape: (17206, 3), Validation shape: (4302, 3)
Label distribution in train: {0: 12504, 1: 4702}
Label distribution in val: {0: 3150, 1: 1152}


In [8]:
# Load or create vocabulary
vocab_path = 'vocab.pkl'
if os.path.exists(vocab_path):
    print(f"Loading vocabulary from {vocab_path}")
    with open(vocab_path, 'rb') as f:
        vocab = pickle.load(f)
else:
    print("Creating new vocabulary")
    vocab = Vocabulary(freq_threshold=3)
    all_texts = train_df['Claim'].tolist() + train_df['Evidence'].tolist()
    vocab.build_vocabulary(all_texts)

    # Save vocabulary
    with open(vocab_path, 'wb') as f:
        pickle.dump(vocab, f)

print(f"Vocabulary size: {len(vocab)}")

Creating new vocabulary


Building vocabulary: 100%|██████████| 34412/34412 [00:03<00:00, 8897.85it/s]

Vocabulary size: 12686
Vocabulary size: 12686





In [9]:
# Load GloVe embeddings
embedding_dim = 300
glove_path = "glove.6B.300d.txt"
embeddings = load_glove_embeddings(vocab, glove_path, embedding_dim)

Loading GloVe embeddings from glove.6B.300d.txt...


Loading GloVe: 104968it [00:02, 44579.87it/s]

Loaded 300-dimensional GloVe embeddings.





### Hyperparameter Tuning

In [38]:
# Create datasets
train_dataset = EvidenceDetectionDataset(train_df, vocab)
val_dataset = EvidenceDetectionDataset(val_df, vocab)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")

num_layers = 1
# Define objective function for Optuna
def objective(trial):
    # Hyperparameters to optimize
    hidden_dim = trial.suggest_categorical('hidden_dim', [128, 256, 512])
    # num_layers = trial.suggest_int('num_layers', [1])
    dropout = trial.suggest_float('dropout', 0.3, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-4, log=True)

    # Initialize model with suggested parameters
    model = BiLSTMAttention(
        vocab_size=len(vocab),
        embedding_dim=embedding_dim,
        hidden_dim=hidden_dim,
        num_layers=num_layers,
        dropout=dropout,
        pretrained_embeddings=embeddings
    )

    # Initialize trainer
    trainer = Trainer(
        model=model,
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        batch_size=batch_size,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        device=device
    )

    # Train for a few epochs
    trainer.train(num_epochs=5)  # Reduced epochs for faster hyperparameter search

    # Evaluate on validation set
    val_metrics = trainer.evaluate()

    # Return F1 score as the optimization metric
    return val_metrics['f1']

# Create study directory
os.makedirs('optuna_results', exist_ok=True)

# Create and run Optuna study
study = optuna.create_study(direction='maximize',
                           study_name='bilstm_attention_optimization'
                          #  storage='sqlite:///optuna_results/bilstm_attention.db'
                            )
study.optimize(objective, n_trials=20)  # Adjust number of trials as needed

# Print and save best parameters
print("Best trial:")
trial = study.best_trial
print(f"  Value (F1 Score): {trial.value:.4f}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Save best parameters
with open('optuna_results/best_params.pkl', 'wb') as f:
    pickle.dump(trial.params, f)

[I 2025-03-21 17:29:54,973] A new study created in memory with name: bilstm_attention_optimization


Train dataset size: 17206
Validation dataset size: 4302
Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.07it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.64it/s]


Epoch 1/5 - Time: 25.40s
Train Loss: 0.4384, Train F1: 0.5560
Val Loss: 0.3910, Val F1: 0.5648
Val Precision: 0.7746, Val Recall: 0.4444
Validation F1 increased (0.000000 --> 0.564810). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.00it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.60it/s]


Epoch 2/5 - Time: 25.44s
Train Loss: 0.3546, Train F1: 0.6905
Val Loss: 0.3767, Val F1: 0.6593
Val Precision: 0.7009, Val Recall: 0.6224
Validation F1 increased (0.564810 --> 0.659310). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.12it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.53it/s]


Epoch 3/5 - Time: 25.41s
Train Loss: 0.2834, Train F1: 0.7662
Val Loss: 0.3924, Val F1: 0.6575
Val Precision: 0.6688, Val Recall: 0.6467
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.58it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.00it/s]


Epoch 4/5 - Time: 25.69s
Train Loss: 0.2060, Train F1: 0.8464
Val Loss: 0.4550, Val F1: 0.6378
Val Precision: 0.6638, Val Recall: 0.6137
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.21it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.68it/s]


Epoch 5/5 - Time: 25.86s
Train Loss: 0.1303, Train F1: 0.9105
Val Loss: 0.6128, Val F1: 0.6083
Val Precision: 0.6227, Val Recall: 0.5946
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 107.92it/s]
[I 2025-03-21 17:32:05,496] Trial 0 finished with value: 0.6593103448275862 and parameters: {'hidden_dim': 512, 'dropout': 0.45853537224102003, 'batch_size': 16, 'learning_rate': 0.0012959121431270646, 'weight_decay': 2.2351873831757065e-05}. Best is trial 0 with value: 0.6593103448275862.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 538/538 [00:15<00:00, 33.89it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 69.05it/s]


Epoch 1/5 - Time: 17.93s
Train Loss: 0.4528, Train F1: 0.5355
Val Loss: 0.4060, Val F1: 0.6090
Val Precision: 0.6817, Val Recall: 0.5503
Validation F1 increased (0.000000 --> 0.609030). Saving model...


Training: 100%|██████████| 538/538 [00:16<00:00, 33.57it/s]
Validating: 100%|██████████| 135/135 [00:02<00:00, 67.39it/s]


Epoch 2/5 - Time: 18.13s
Train Loss: 0.3835, Train F1: 0.6459
Val Loss: 0.4097, Val F1: 0.5496
Val Precision: 0.7912, Val Recall: 0.4210
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 538/538 [00:16<00:00, 33.19it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 69.44it/s]


Epoch 3/5 - Time: 18.25s
Train Loss: 0.3389, Train F1: 0.6981
Val Loss: 0.3919, Val F1: 0.6699
Val Precision: 0.6740, Val Recall: 0.6658
Validation F1 increased (0.609030 --> 0.669869). Saving model...


Training: 100%|██████████| 538/538 [00:16<00:00, 33.31it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 70.37it/s]


Epoch 4/5 - Time: 18.16s
Train Loss: 0.3032, Train F1: 0.7511
Val Loss: 0.3968, Val F1: 0.6706
Val Precision: 0.6171, Val Recall: 0.7344
Validation F1 increased (0.669869 --> 0.670630). Saving model...


Training: 100%|██████████| 538/538 [00:15<00:00, 34.17it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 70.15it/s]


Epoch 5/5 - Time: 17.76s
Train Loss: 0.2592, Train F1: 0.7995
Val Loss: 0.4193, Val F1: 0.6521
Val Precision: 0.6423, Val Recall: 0.6623
EarlyStopping counter: 1 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 135/135 [00:01<00:00, 69.95it/s]
[I 2025-03-21 17:33:37,946] Trial 1 finished with value: 0.6706302021403091 and parameters: {'hidden_dim': 512, 'dropout': 0.3415018128297464, 'batch_size': 32, 'learning_rate': 0.003677635577834768, 'weight_decay': 4.642378173429623e-05}. Best is trial 1 with value: 0.6706302021403091.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 23.85it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.27it/s]


Epoch 1/5 - Time: 12.98s
Train Loss: 0.4391, Train F1: 0.5610
Val Loss: 0.3853, Val F1: 0.6172
Val Precision: 0.6997, Val Recall: 0.5521
Validation F1 increased (0.000000 --> 0.617176). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 23.93it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.18it/s]


Epoch 2/5 - Time: 12.95s
Train Loss: 0.3400, Train F1: 0.7076
Val Loss: 0.3692, Val F1: 0.6512
Val Precision: 0.7183, Val Recall: 0.5955
Validation F1 increased (0.617176 --> 0.651163). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.20it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 41.37it/s]


Epoch 3/5 - Time: 12.85s
Train Loss: 0.2483, Train F1: 0.8080
Val Loss: 0.4713, Val F1: 0.5993
Val Precision: 0.7222, Val Recall: 0.5122
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.14it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.10it/s]


Epoch 4/5 - Time: 12.86s
Train Loss: 0.1600, Train F1: 0.8830
Val Loss: 0.5328, Val F1: 0.6253
Val Precision: 0.6431, Val Recall: 0.6085
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.12it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.06it/s]


Epoch 5/5 - Time: 12.86s
Train Loss: 0.1005, Train F1: 0.9335
Val Loss: 0.6261, Val F1: 0.6224
Val Precision: 0.6224, Val Recall: 0.6224
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 42.66it/s]
[I 2025-03-21 17:34:44,218] Trial 2 finished with value: 0.6511627906976745 and parameters: {'hidden_dim': 256, 'dropout': 0.44167815773088326, 'batch_size': 64, 'learning_rate': 0.0032288682851384465, 'weight_decay': 7.717950438739426e-06}. Best is trial 1 with value: 0.6706302021403091.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 538/538 [00:15<00:00, 35.74it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 72.42it/s]


Epoch 1/5 - Time: 17.01s
Train Loss: 0.5539, Train F1: 0.1976
Val Loss: 0.4492, Val F1: 0.4756
Val Precision: 0.6580, Val Recall: 0.3724
Validation F1 increased (0.000000 --> 0.475610). Saving model...


Training: 100%|██████████| 538/538 [00:15<00:00, 35.87it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.31it/s]


Epoch 2/5 - Time: 16.99s
Train Loss: 0.4340, Train F1: 0.5698
Val Loss: 0.4056, Val F1: 0.5830
Val Precision: 0.7242, Val Recall: 0.4878
Validation F1 increased (0.475610 --> 0.582988). Saving model...


Training: 100%|██████████| 538/538 [00:14<00:00, 36.27it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 72.16it/s]


Epoch 3/5 - Time: 16.80s
Train Loss: 0.3948, Train F1: 0.6219
Val Loss: 0.3860, Val F1: 0.6300
Val Precision: 0.7173, Val Recall: 0.5616
Validation F1 increased (0.582988 --> 0.629990). Saving model...


Training: 100%|██████████| 538/538 [00:14<00:00, 36.07it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.47it/s]


Epoch 4/5 - Time: 16.90s
Train Loss: 0.3681, Train F1: 0.6664
Val Loss: 0.3887, Val F1: 0.6191
Val Precision: 0.7488, Val Recall: 0.5278
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 538/538 [00:14<00:00, 36.00it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.72it/s]


Epoch 5/5 - Time: 16.92s
Train Loss: 0.3420, Train F1: 0.6976
Val Loss: 0.3980, Val F1: 0.6328
Val Precision: 0.7509, Val Recall: 0.5469
Validation F1 increased (0.629990 --> 0.632848). Saving model...
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 135/135 [00:01<00:00, 72.42it/s]
[I 2025-03-21 17:36:10,933] Trial 3 finished with value: 0.6328478151682572 and parameters: {'hidden_dim': 128, 'dropout': 0.31906989035272887, 'batch_size': 32, 'learning_rate': 0.00010171494097999478, 'weight_decay': 2.5124651586972893e-05}. Best is trial 1 with value: 0.6706302021403091.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 538/538 [00:14<00:00, 36.04it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.79it/s]


Epoch 1/5 - Time: 16.90s
Train Loss: 0.4553, Train F1: 0.5222
Val Loss: 0.3887, Val F1: 0.6667
Val Precision: 0.6702, Val Recall: 0.6632
Validation F1 increased (0.000000 --> 0.666667). Saving model...


Training: 100%|██████████| 538/538 [00:15<00:00, 35.76it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.09it/s]


Epoch 2/5 - Time: 17.04s
Train Loss: 0.3442, Train F1: 0.7031
Val Loss: 0.3714, Val F1: 0.6214
Val Precision: 0.7626, Val Recall: 0.5243
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 538/538 [00:14<00:00, 36.02it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 72.16it/s]


Epoch 3/5 - Time: 16.90s
Train Loss: 0.2568, Train F1: 0.7966
Val Loss: 0.4052, Val F1: 0.6631
Val Precision: 0.6281, Val Recall: 0.7023
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 538/538 [00:15<00:00, 35.82it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 70.80it/s]


Epoch 4/5 - Time: 17.02s
Train Loss: 0.1648, Train F1: 0.8782
Val Loss: 0.4948, Val F1: 0.6436
Val Precision: 0.6323, Val Recall: 0.6554
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 135/135 [00:01<00:00, 71.55it/s]
[I 2025-03-21 17:37:20,796] Trial 4 finished with value: 0.6666666666666666 and parameters: {'hidden_dim': 128, 'dropout': 0.40622620017986255, 'batch_size': 32, 'learning_rate': 0.0007019580986736259, 'weight_decay': 1.362971193038619e-06}. Best is trial 1 with value: 0.6706302021403091.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.89it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.96it/s]


Epoch 1/5 - Time: 25.01s
Train Loss: 0.4608, Train F1: 0.4931
Val Loss: 0.4146, Val F1: 0.5459
Val Precision: 0.7031, Val Recall: 0.4462
Validation F1 increased (0.000000 --> 0.545937). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.27it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.78it/s]


Epoch 2/5 - Time: 25.33s
Train Loss: 0.4069, Train F1: 0.5999
Val Loss: 0.3925, Val F1: 0.5855
Val Precision: 0.7359, Val Recall: 0.4861
Validation F1 increased (0.545937 --> 0.585468). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.23it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 107.41it/s]


Epoch 3/5 - Time: 25.38s
Train Loss: 0.3730, Train F1: 0.6550
Val Loss: 0.4062, Val F1: 0.6799
Val Precision: 0.6254, Val Recall: 0.7448
Validation F1 increased (0.585468 --> 0.679873). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.52it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 110.04it/s]


Epoch 4/5 - Time: 25.18s
Train Loss: 0.3406, Train F1: 0.7126
Val Loss: 0.3945, Val F1: 0.6376
Val Precision: 0.7018, Val Recall: 0.5842
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.03it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.15it/s]


Epoch 5/5 - Time: 24.96s
Train Loss: 0.3122, Train F1: 0.7405
Val Loss: 0.4237, Val F1: 0.6011
Val Precision: 0.7027, Val Recall: 0.5252
EarlyStopping counter: 2 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 109.41it/s]
[I 2025-03-21 17:39:29,338] Trial 5 finished with value: 0.6798732171156894 and parameters: {'hidden_dim': 256, 'dropout': 0.3327062265419595, 'batch_size': 16, 'learning_rate': 0.004468091432495813, 'weight_decay': 6.44022285355894e-05}. Best is trial 5 with value: 0.6798732171156894.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 538/538 [00:15<00:00, 35.53it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.98it/s]


Epoch 1/5 - Time: 17.11s
Train Loss: 0.4748, Train F1: 0.4705
Val Loss: 0.3855, Val F1: 0.6352
Val Precision: 0.6886, Val Recall: 0.5894
Validation F1 increased (0.000000 --> 0.635173). Saving model...


Training: 100%|██████████| 538/538 [00:15<00:00, 35.33it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.42it/s]


Epoch 2/5 - Time: 17.21s
Train Loss: 0.3625, Train F1: 0.6733
Val Loss: 0.3667, Val F1: 0.6303
Val Precision: 0.7344, Val Recall: 0.5521
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 538/538 [00:15<00:00, 35.62it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 72.05it/s]


Epoch 3/5 - Time: 17.07s
Train Loss: 0.3005, Train F1: 0.7490
Val Loss: 0.3779, Val F1: 0.6634
Val Precision: 0.6810, Val Recall: 0.6467
Validation F1 increased (0.635173 --> 0.663402). Saving model...


Training: 100%|██████████| 538/538 [00:15<00:00, 35.68it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.09it/s]


Epoch 4/5 - Time: 17.07s
Train Loss: 0.2300, Train F1: 0.8247
Val Loss: 0.4201, Val F1: 0.6616
Val Precision: 0.6129, Val Recall: 0.7188
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 538/538 [00:15<00:00, 35.74it/s]
Validating: 100%|██████████| 135/135 [00:01<00:00, 71.09it/s]


Epoch 5/5 - Time: 17.05s
Train Loss: 0.1634, Train F1: 0.8864
Val Loss: 0.5067, Val F1: 0.6350
Val Precision: 0.6778, Val Recall: 0.5972
EarlyStopping counter: 2 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 135/135 [00:01<00:00, 71.41it/s]
[I 2025-03-21 17:40:56,900] Trial 6 finished with value: 0.6634016028495102 and parameters: {'hidden_dim': 128, 'dropout': 0.35464280462923115, 'batch_size': 32, 'learning_rate': 0.00037825110212021254, 'weight_decay': 1.5521697571678072e-06}. Best is trial 5 with value: 0.6798732171156894.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 23.92it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.69it/s]


Epoch 1/5 - Time: 12.93s
Train Loss: 0.5211, Train F1: 0.3230
Val Loss: 0.4204, Val F1: 0.5849
Val Precision: 0.6531, Val Recall: 0.5295
Validation F1 increased (0.000000 --> 0.584851). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 23.98it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.87it/s]


Epoch 2/5 - Time: 12.90s
Train Loss: 0.4130, Train F1: 0.5978
Val Loss: 0.3942, Val F1: 0.5789
Val Precision: 0.7311, Val Recall: 0.4792
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 23.99it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.48it/s]


Epoch 3/5 - Time: 12.91s
Train Loss: 0.3784, Train F1: 0.6481
Val Loss: 0.3859, Val F1: 0.6361
Val Precision: 0.7151, Val Recall: 0.5729
Validation F1 increased (0.584851 --> 0.636145). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.11it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.09it/s]


Epoch 4/5 - Time: 12.83s
Train Loss: 0.3554, Train F1: 0.6793
Val Loss: 0.3866, Val F1: 0.6037
Val Precision: 0.7699, Val Recall: 0.4965
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.13it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.81it/s]


Epoch 5/5 - Time: 12.83s
Train Loss: 0.3282, Train F1: 0.7140
Val Loss: 0.3706, Val F1: 0.6601
Val Precision: 0.7165, Val Recall: 0.6120
Validation F1 increased (0.636145 --> 0.660112). Saving model...
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 43.03it/s]
[I 2025-03-21 17:42:03,165] Trial 7 finished with value: 0.6601123595505618 and parameters: {'hidden_dim': 512, 'dropout': 0.4772300969152529, 'batch_size': 64, 'learning_rate': 0.00011911841936150478, 'weight_decay': 1.3175172580448904e-06}. Best is trial 5 with value: 0.6798732171156894.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 24.12it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.36it/s]


Epoch 1/5 - Time: 12.82s
Train Loss: 0.4603, Train F1: 0.5202
Val Loss: 0.3882, Val F1: 0.6345
Val Precision: 0.6968, Val Recall: 0.5825
Validation F1 increased (0.000000 --> 0.634515). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.14it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.22it/s]


Epoch 2/5 - Time: 12.81s
Train Loss: 0.3846, Train F1: 0.6517
Val Loss: 0.3727, Val F1: 0.6424
Val Precision: 0.7149, Val Recall: 0.5833
Validation F1 increased (0.634515 --> 0.642447). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.06it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.44it/s]


Epoch 3/5 - Time: 12.88s
Train Loss: 0.3391, Train F1: 0.7099
Val Loss: 0.3732, Val F1: 0.6220
Val Precision: 0.7485, Val Recall: 0.5321
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.10it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 41.72it/s]


Epoch 4/5 - Time: 12.89s
Train Loss: 0.2895, Train F1: 0.7677
Val Loss: 0.3865, Val F1: 0.6836
Val Precision: 0.6292, Val Recall: 0.7483
Validation F1 increased (0.642447 --> 0.683584). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.13it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.29it/s]


Epoch 5/5 - Time: 12.85s
Train Loss: 0.2311, Train F1: 0.8231
Val Loss: 0.4062, Val F1: 0.6586
Val Precision: 0.6767, Val Recall: 0.6415
EarlyStopping counter: 1 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 42.12it/s]
[I 2025-03-21 17:43:09,310] Trial 8 finished with value: 0.6835844567803331 and parameters: {'hidden_dim': 512, 'dropout': 0.46593209511824596, 'batch_size': 64, 'learning_rate': 0.0007595312137874969, 'weight_decay': 9.000677316506832e-05}. Best is trial 8 with value: 0.6835844567803331.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 24.00it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.01it/s]


Epoch 1/5 - Time: 12.89s
Train Loss: 0.5343, Train F1: 0.2636
Val Loss: 0.4325, Val F1: 0.5263
Val Precision: 0.6584, Val Recall: 0.4384
Validation F1 increased (0.000000 --> 0.526316). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.01it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.87it/s]


Epoch 2/5 - Time: 12.88s
Train Loss: 0.4193, Train F1: 0.5944
Val Loss: 0.4359, Val F1: 0.4734
Val Precision: 0.7820, Val Recall: 0.3394
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.42it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.63it/s]


Epoch 3/5 - Time: 12.71s
Train Loss: 0.3824, Train F1: 0.6438
Val Loss: 0.3877, Val F1: 0.6386
Val Precision: 0.7118, Val Recall: 0.5790
Validation F1 increased (0.526316 --> 0.638583). Saving model...


Training: 100%|██████████| 269/269 [00:10<00:00, 24.59it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.32it/s]


Epoch 4/5 - Time: 12.60s
Train Loss: 0.3545, Train F1: 0.6842
Val Loss: 0.3822, Val F1: 0.6460
Val Precision: 0.7023, Val Recall: 0.5981
Validation F1 increased (0.638583 --> 0.646038). Saving model...


Training: 100%|██████████| 269/269 [00:10<00:00, 24.47it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.76it/s]


Epoch 5/5 - Time: 12.68s
Train Loss: 0.3250, Train F1: 0.7190
Val Loss: 0.3792, Val F1: 0.6600
Val Precision: 0.6877, Val Recall: 0.6345
Validation F1 increased (0.646038 --> 0.660045). Saving model...
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 42.99it/s]
[I 2025-03-21 17:44:14,903] Trial 9 finished with value: 0.6600451467268623 and parameters: {'hidden_dim': 256, 'dropout': 0.4677933231849839, 'batch_size': 64, 'learning_rate': 0.000141851256628224, 'weight_decay': 1.8426265036674089e-06}. Best is trial 8 with value: 0.6835844567803331.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 24.24it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.11it/s]


Epoch 1/5 - Time: 12.77s
Train Loss: 0.4457, Train F1: 0.5588
Val Loss: 0.3697, Val F1: 0.6657
Val Precision: 0.7214, Val Recall: 0.6181
Validation F1 increased (0.000000 --> 0.665732). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 23.96it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.19it/s]


Epoch 2/5 - Time: 12.90s
Train Loss: 0.3419, Train F1: 0.7070
Val Loss: 0.3830, Val F1: 0.6585
Val Precision: 0.6905, Val Recall: 0.6293
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.26it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.69it/s]


Epoch 3/5 - Time: 12.78s
Train Loss: 0.2439, Train F1: 0.8110
Val Loss: 0.4339, Val F1: 0.6620
Val Precision: 0.5964, Val Recall: 0.7439
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.23it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.62it/s]


Epoch 4/5 - Time: 12.79s
Train Loss: 0.1412, Train F1: 0.8971
Val Loss: 0.5082, Val F1: 0.6312
Val Precision: 0.6619, Val Recall: 0.6033
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 42.31it/s]
[I 2025-03-21 17:45:07,923] Trial 10 finished with value: 0.6657316503038803 and parameters: {'hidden_dim': 512, 'dropout': 0.4095988232629122, 'batch_size': 64, 'learning_rate': 0.0016553771586655481, 'weight_decay': 7.540331317933359e-06}. Best is trial 8 with value: 0.6835844567803331.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.92it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.92it/s]


Epoch 1/5 - Time: 25.03s
Train Loss: 0.4871, Train F1: 0.4180
Val Loss: 0.4589, Val F1: 0.5769
Val Precision: 0.6251, Val Recall: 0.5356
Validation F1 increased (0.000000 --> 0.576905). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.95it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.98it/s]


Epoch 2/5 - Time: 24.98s
Train Loss: 0.4405, Train F1: 0.5340
Val Loss: 0.4276, Val F1: 0.6180
Val Precision: 0.6297, Val Recall: 0.6068
Validation F1 increased (0.576905 --> 0.618037). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.39it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 110.39it/s]


Epoch 3/5 - Time: 24.77s
Train Loss: 0.4208, Train F1: 0.5718
Val Loss: 0.4946, Val F1: 0.5078
Val Precision: 0.7538, Val Recall: 0.3828
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.15it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.93it/s]


Epoch 4/5 - Time: 24.91s
Train Loss: 0.3932, Train F1: 0.6302
Val Loss: 0.3823, Val F1: 0.6180
Val Precision: 0.7162, Val Recall: 0.5434
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.17it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 110.13it/s]


Epoch 5/5 - Time: 24.88s
Train Loss: 0.3611, Train F1: 0.6797
Val Loss: 0.4368, Val F1: 0.6604
Val Precision: 0.6011, Val Recall: 0.7326
Validation F1 increased (0.618037 --> 0.660407). Saving model...
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 109.60it/s]
[I 2025-03-21 17:47:15,163] Trial 11 finished with value: 0.6604068857589984 and parameters: {'hidden_dim': 256, 'dropout': 0.376042368065664, 'batch_size': 16, 'learning_rate': 0.006883437451606231, 'weight_decay': 7.585260929894121e-05}. Best is trial 8 with value: 0.6835844567803331.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.16it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.79it/s]


Epoch 1/5 - Time: 24.89s
Train Loss: 0.4534, Train F1: 0.5335
Val Loss: 0.3913, Val F1: 0.6134
Val Precision: 0.7161, Val Recall: 0.5365
Validation F1 increased (0.000000 --> 0.613400). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.28it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.80it/s]


Epoch 2/5 - Time: 24.83s
Train Loss: 0.3749, Train F1: 0.6633
Val Loss: 0.3860, Val F1: 0.6856
Val Precision: 0.6422, Val Recall: 0.7352
Validation F1 increased (0.613400 --> 0.685552). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.25it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.65it/s]


Epoch 3/5 - Time: 24.85s
Train Loss: 0.3256, Train F1: 0.7184
Val Loss: 0.3827, Val F1: 0.6379
Val Precision: 0.7236, Val Recall: 0.5703
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.36it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.26it/s]


Epoch 4/5 - Time: 24.81s
Train Loss: 0.2683, Train F1: 0.7856
Val Loss: 0.4085, Val F1: 0.6487
Val Precision: 0.6991, Val Recall: 0.6050
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.45it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.21it/s]


Epoch 5/5 - Time: 24.79s
Train Loss: 0.2072, Train F1: 0.8477
Val Loss: 0.4239, Val F1: 0.6493
Val Precision: 0.6601, Val Recall: 0.6389
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 109.98it/s]
[I 2025-03-21 17:49:21,962] Trial 12 finished with value: 0.6855524079320113 and parameters: {'hidden_dim': 256, 'dropout': 0.4992999000290837, 'batch_size': 16, 'learning_rate': 0.0005037818556479531, 'weight_decay': 9.681497047719699e-05}. Best is trial 12 with value: 0.6855524079320113.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.32it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.79it/s]


Epoch 1/5 - Time: 24.81s
Train Loss: 0.4605, Train F1: 0.5156
Val Loss: 0.3900, Val F1: 0.6709
Val Precision: 0.6551, Val Recall: 0.6875
Validation F1 increased (0.000000 --> 0.670902). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.39it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 110.18it/s]


Epoch 2/5 - Time: 24.77s
Train Loss: 0.3789, Train F1: 0.6511
Val Loss: 0.3699, Val F1: 0.6130
Val Precision: 0.7484, Val Recall: 0.5191
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.38it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.50it/s]


Epoch 3/5 - Time: 24.82s
Train Loss: 0.3302, Train F1: 0.7192
Val Loss: 0.3818, Val F1: 0.6114
Val Precision: 0.7453, Val Recall: 0.5182
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.30it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.63it/s]


Epoch 4/5 - Time: 24.83s
Train Loss: 0.2769, Train F1: 0.7831
Val Loss: 0.3886, Val F1: 0.6551
Val Precision: 0.7107, Val Recall: 0.6076
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 110.02it/s]
[I 2025-03-21 17:51:03,779] Trial 13 finished with value: 0.6709021601016518 and parameters: {'hidden_dim': 256, 'dropout': 0.499164563710084, 'batch_size': 16, 'learning_rate': 0.000377960236279894, 'weight_decay': 9.015899669441202e-05}. Best is trial 12 with value: 0.6855524079320113.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 24.16it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.11it/s]


Epoch 1/5 - Time: 12.81s
Train Loss: 0.4598, Train F1: 0.5127
Val Loss: 0.3936, Val F1: 0.6018
Val Precision: 0.7126, Val Recall: 0.5208
Validation F1 increased (0.000000 --> 0.601805). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.05it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.70it/s]


Epoch 2/5 - Time: 12.87s
Train Loss: 0.3744, Train F1: 0.6622
Val Loss: 0.3674, Val F1: 0.6428
Val Precision: 0.7252, Val Recall: 0.5773
Validation F1 increased (0.601805 --> 0.642823). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.15it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.16it/s]


Epoch 3/5 - Time: 12.85s
Train Loss: 0.3251, Train F1: 0.7209
Val Loss: 0.3674, Val F1: 0.6811
Val Precision: 0.6817, Val Recall: 0.6806
Validation F1 increased (0.642823 --> 0.681147). Saving model...


Training: 100%|██████████| 269/269 [00:11<00:00, 24.20it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.50it/s]


Epoch 4/5 - Time: 12.82s
Train Loss: 0.2629, Train F1: 0.7896
Val Loss: 0.3996, Val F1: 0.6397
Val Precision: 0.7186, Val Recall: 0.5764
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:11<00:00, 24.27it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 42.75it/s]


Epoch 5/5 - Time: 12.77s
Train Loss: 0.1966, Train F1: 0.8557
Val Loss: 0.4540, Val F1: 0.6488
Val Precision: 0.6797, Val Recall: 0.6207
EarlyStopping counter: 2 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 42.84it/s]
[I 2025-03-21 17:52:09,772] Trial 14 finished with value: 0.6811468288444831 and parameters: {'hidden_dim': 512, 'dropout': 0.4323880217012266, 'batch_size': 64, 'learning_rate': 0.0005647862288203085, 'weight_decay': 3.0378172130587978e-05}. Best is trial 12 with value: 0.6855524079320113.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.29it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.72it/s]


Epoch 1/5 - Time: 24.83s
Train Loss: 0.4649, Train F1: 0.5005
Val Loss: 0.3927, Val F1: 0.6619
Val Precision: 0.6481, Val Recall: 0.6762
Validation F1 increased (0.000000 --> 0.661852). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.23it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.67it/s]


Epoch 2/5 - Time: 24.86s
Train Loss: 0.3764, Train F1: 0.6603
Val Loss: 0.3843, Val F1: 0.6048
Val Precision: 0.7776, Val Recall: 0.4948
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.50it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.54it/s]


Epoch 3/5 - Time: 24.74s
Train Loss: 0.3260, Train F1: 0.7217
Val Loss: 0.3988, Val F1: 0.6246
Val Precision: 0.7543, Val Recall: 0.5330
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.42it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.67it/s]


Epoch 4/5 - Time: 24.77s
Train Loss: 0.2695, Train F1: 0.7857
Val Loss: 0.4233, Val F1: 0.6348
Val Precision: 0.7330, Val Recall: 0.5599
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 110.34it/s]
[I 2025-03-21 17:53:51,552] Trial 15 finished with value: 0.6618521665250637 and parameters: {'hidden_dim': 256, 'dropout': 0.49946441492343524, 'batch_size': 16, 'learning_rate': 0.0002208593967342603, 'weight_decay': 1.3769144005769938e-05}. Best is trial 12 with value: 0.6855524079320113.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.25it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 110.22it/s]


Epoch 1/5 - Time: 25.31s
Train Loss: 0.4382, Train F1: 0.5665
Val Loss: 0.3764, Val F1: 0.6186
Val Precision: 0.7690, Val Recall: 0.5174
Validation F1 increased (0.000000 --> 0.618578). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.28it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.80it/s]


Epoch 2/5 - Time: 25.30s
Train Loss: 0.3141, Train F1: 0.7349
Val Loss: 0.4694, Val F1: 0.5915
Val Precision: 0.7315, Val Recall: 0.4965
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.38it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.93it/s]


Epoch 3/5 - Time: 25.28s
Train Loss: 0.2112, Train F1: 0.8399
Val Loss: 0.4603, Val F1: 0.6430
Val Precision: 0.6591, Val Recall: 0.6276
Validation F1 increased (0.618578 --> 0.642952). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.53it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.94it/s]


Epoch 4/5 - Time: 25.18s
Train Loss: 0.1148, Train F1: 0.9200
Val Loss: 0.8621, Val F1: 0.5994
Val Precision: 0.6382, Val Recall: 0.5651
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 47.13it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.61it/s]


Epoch 5/5 - Time: 25.38s
Train Loss: 0.0648, Train F1: 0.9617
Val Loss: 0.9874, Val F1: 0.6242
Val Precision: 0.5791, Val Recall: 0.6771
EarlyStopping counter: 2 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 110.55it/s]
[I 2025-03-21 17:56:00,667] Trial 16 finished with value: 0.6429524232992441 and parameters: {'hidden_dim': 512, 'dropout': 0.4320876431747581, 'batch_size': 16, 'learning_rate': 0.0016439435828147543, 'weight_decay': 3.96285310609686e-06}. Best is trial 12 with value: 0.6855524079320113.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 269/269 [00:11<00:00, 24.29it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.55it/s]


Epoch 1/5 - Time: 12.73s
Train Loss: 0.4642, Train F1: 0.5014
Val Loss: 0.3866, Val F1: 0.6025
Val Precision: 0.7179, Val Recall: 0.5191
Validation F1 increased (0.000000 --> 0.602519). Saving model...


Training: 100%|██████████| 269/269 [00:10<00:00, 24.54it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.39it/s]


Epoch 2/5 - Time: 12.62s
Train Loss: 0.3707, Train F1: 0.6582
Val Loss: 0.3854, Val F1: 0.5961
Val Precision: 0.7703, Val Recall: 0.4861
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 269/269 [00:10<00:00, 24.59it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.35it/s]


Epoch 3/5 - Time: 12.60s
Train Loss: 0.3186, Train F1: 0.7277
Val Loss: 0.3794, Val F1: 0.6494
Val Precision: 0.7408, Val Recall: 0.5781
Validation F1 increased (0.602519 --> 0.649439). Saving model...


Training: 100%|██████████| 269/269 [00:10<00:00, 24.53it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.11it/s]


Epoch 4/5 - Time: 12.64s
Train Loss: 0.2564, Train F1: 0.8005
Val Loss: 0.3847, Val F1: 0.6661
Val Precision: 0.6664, Val Recall: 0.6658
Validation F1 increased (0.649439 --> 0.666088). Saving model...


Training: 100%|██████████| 269/269 [00:10<00:00, 24.48it/s]
Validating: 100%|██████████| 68/68 [00:01<00:00, 43.33it/s]


Epoch 5/5 - Time: 12.65s
Train Loss: 0.1842, Train F1: 0.8665
Val Loss: 0.4838, Val F1: 0.6471
Val Precision: 0.6621, Val Recall: 0.6328
EarlyStopping counter: 1 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 68/68 [00:01<00:00, 42.87it/s]
[I 2025-03-21 17:57:05,727] Trial 17 finished with value: 0.6660877116804168 and parameters: {'hidden_dim': 256, 'dropout': 0.47476943763630997, 'batch_size': 64, 'learning_rate': 0.0007931645345283066, 'weight_decay': 4.435500680222667e-05}. Best is trial 12 with value: 0.6855524079320113.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.37it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.05it/s]


Epoch 1/5 - Time: 25.79s
Train Loss: 0.4509, Train F1: 0.5247
Val Loss: 0.3989, Val F1: 0.5639
Val Precision: 0.7611, Val Recall: 0.4479
Validation F1 increased (0.000000 --> 0.563934). Saving model...


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.34it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.75it/s]


Epoch 2/5 - Time: 25.79s
Train Loss: 0.3791, Train F1: 0.6528
Val Loss: 0.3715, Val F1: 0.6058
Val Precision: 0.7663, Val Recall: 0.5009
Validation F1 increased (0.563934 --> 0.605774). Saving model...


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.40it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.18it/s]


Epoch 3/5 - Time: 25.75s
Train Loss: 0.3412, Train F1: 0.7029
Val Loss: 0.3654, Val F1: 0.6646
Val Precision: 0.6894, Val Recall: 0.6415
Validation F1 increased (0.605774 --> 0.664568). Saving model...


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.45it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.81it/s]


Epoch 4/5 - Time: 25.74s
Train Loss: 0.2936, Train F1: 0.7610
Val Loss: 0.3770, Val F1: 0.6911
Val Precision: 0.6453, Val Recall: 0.7439
Validation F1 increased (0.664568 --> 0.691129). Saving model...


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.51it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 110.05it/s]


Epoch 5/5 - Time: 25.67s
Train Loss: 0.2414, Train F1: 0.8108
Val Loss: 0.3945, Val F1: 0.6483
Val Precision: 0.6764, Val Recall: 0.6224
EarlyStopping counter: 1 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 110.94it/s]
[I 2025-03-21 17:59:17,249] Trial 18 finished with value: 0.6911290322580645 and parameters: {'hidden_dim': 512, 'dropout': 0.4508594141808036, 'batch_size': 16, 'learning_rate': 0.00027416000685940727, 'weight_decay': 9.847501948528487e-05}. Best is trial 18 with value: 0.6911290322580645.


Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.24it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.58it/s]


Epoch 1/5 - Time: 24.86s
Train Loss: 0.4753, Train F1: 0.4811
Val Loss: 0.3925, Val F1: 0.6446
Val Precision: 0.6842, Val Recall: 0.6094
Validation F1 increased (0.000000 --> 0.644628). Saving model...


Training: 100%|██████████| 1076/1076 [00:22<00:00, 48.71it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.58it/s]


Epoch 2/5 - Time: 24.67s
Train Loss: 0.3795, Train F1: 0.6552
Val Loss: 0.3677, Val F1: 0.6536
Val Precision: 0.7307, Val Recall: 0.5911
Validation F1 increased (0.644628 --> 0.653551). Saving model...


Training: 100%|██████████| 1076/1076 [00:21<00:00, 49.10it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.04it/s]


Epoch 3/5 - Time: 24.50s
Train Loss: 0.3234, Train F1: 0.7252
Val Loss: 0.3652, Val F1: 0.6794
Val Precision: 0.6800, Val Recall: 0.6788
Validation F1 increased (0.653551 --> 0.679409). Saving model...


Training: 100%|██████████| 1076/1076 [00:21<00:00, 48.95it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.23it/s]


Epoch 4/5 - Time: 24.54s
Train Loss: 0.2646, Train F1: 0.7894
Val Loss: 0.4027, Val F1: 0.6623
Val Precision: 0.6589, Val Recall: 0.6658
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:21<00:00, 48.91it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.69it/s]


Epoch 5/5 - Time: 24.57s
Train Loss: 0.2048, Train F1: 0.8536
Val Loss: 0.4366, Val F1: 0.6675
Val Precision: 0.6441, Val Recall: 0.6927
EarlyStopping counter: 2 out of 3
Loaded best model from 'best_bilstm_attention_model.pt'


Validating: 100%|██████████| 269/269 [00:02<00:00, 109.79it/s]
[I 2025-03-21 18:01:23,047] Trial 19 finished with value: 0.6794092093831451 and parameters: {'hidden_dim': 128, 'dropout': 0.3864588188567415, 'batch_size': 16, 'learning_rate': 0.0002398422178544341, 'weight_decay': 1.5113095854152683e-05}. Best is trial 18 with value: 0.6911290322580645.


Best trial:
  Value (F1 Score): 0.6911
  Params: 
    hidden_dim: 512
    dropout: 0.4508594141808036
    batch_size: 16
    learning_rate: 0.00027416000685940727
    weight_decay: 9.847501948528487e-05


### Train Model

In [39]:
# Train final model with best hyperparameters
print("\nTraining final model with best hyperparameters...")
model = BiLSTMAttention(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    hidden_dim=trial.params['hidden_dim'],
    # num_layers=trial.params['num_layers'],
    num_layers=num_layers,
    dropout=trial.params['dropout'],
    pretrained_embeddings=embeddings
)


# Print model architecture summary
print("\nModel Architecture:")
print(f"Vocabulary Size: {len(vocab)}")
print(f"Embedding Dimension: {embedding_dim}")
print(f"Hidden Dimension: {trial.params['hidden_dim']}")
# print(f"Batch size: {trial.params['batch_size']}")
print(f"Number of LSTM Layers: {num_layers}")
print(f"Dropout Rate: {trial.params['dropout']}")



# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}")

# Initialize trainer
trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    batch_size=trial.params['batch_size'],
    learning_rate=trial.params['learning_rate'],
    weight_decay=trial.params['weight_decay'],
    device=device
)

# Train model
print("\nStarting training...")
trainer.train(num_epochs=15)

# Final evaluation
print("\nPerforming final evaluation on validation set...")
val_metrics = trainer.evaluate()

print("\nFinal Validation Metrics:")
print(f"Loss: {val_metrics['loss']:.4f}")
print(f"Accuracy: {val_metrics['accuracy']:.4f}")
print(f"Precision: {val_metrics['precision']:.4f}")
print(f"Recall: {val_metrics['recall']:.4f}")
print(f"F1 Score: {val_metrics['f1']:.4f}")

# Save final model
final_model_path = 'models/final_model.pt'
trainer.save_model(final_model_path)

print("\nTraining and evaluation completed!")


Training final model with best hyperparameters...

Model Architecture:
Vocabulary Size: 12686
Embedding Dimension: 300
Hidden Dimension: 512
Number of LSTM Layers: 1
Dropout Rate: 0.4508594141808036
Total Parameters: 6,880,364
Trainable Parameters: 6,880,364

Starting training...
Starting training on device: cuda
Training set size: 17206
Validation set size: 4302


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.29it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 109.06it/s]


Epoch 1/15 - Time: 25.80s
Train Loss: 0.4565, Train F1: 0.5149
Val Loss: 0.3946, Val F1: 0.5839
Val Precision: 0.7453, Val Recall: 0.4800
Validation F1 increased (0.000000 --> 0.583949). Saving model...


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.33it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.51it/s]


Epoch 2/15 - Time: 25.80s
Train Loss: 0.3796, Train F1: 0.6497
Val Loss: 0.3715, Val F1: 0.6736
Val Precision: 0.6772, Val Recall: 0.6701
Validation F1 increased (0.583949 --> 0.673647). Saving model...


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.35it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 107.94it/s]


Epoch 3/15 - Time: 25.80s
Train Loss: 0.3423, Train F1: 0.6967
Val Loss: 0.3796, Val F1: 0.6523
Val Precision: 0.7288, Val Recall: 0.5903
EarlyStopping counter: 1 out of 3


Training: 100%|██████████| 1076/1076 [00:23<00:00, 46.25it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.71it/s]


Epoch 4/15 - Time: 25.84s
Train Loss: 0.2974, Train F1: 0.7559
Val Loss: 0.3733, Val F1: 0.6628
Val Precision: 0.6751, Val Recall: 0.6510
EarlyStopping counter: 2 out of 3


Training: 100%|██████████| 1076/1076 [00:22<00:00, 46.98it/s]
Validating: 100%|██████████| 269/269 [00:02<00:00, 108.83it/s]


Epoch 5/15 - Time: 25.47s
Train Loss: 0.2458, Train F1: 0.8075
Val Loss: 0.4039, Val F1: 0.6718
Val Precision: 0.6650, Val Recall: 0.6788
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model from 'best_bilstm_attention_model.pt'

Performing final evaluation on validation set...


Validating: 100%|██████████| 269/269 [00:02<00:00, 109.35it/s]


Final Validation Metrics:
Loss: 0.3715
Accuracy: 0.8261
Precision: 0.6772
Recall: 0.6701
F1 Score: 0.6736
Model saved to models/final_model.pt

Training and evaluation completed!





In [40]:
# Create the dataset
test_dataset = EvidenceDetectionDataset(test_df, vocab)
print(f"Test dataset size: {len(test_dataset)}")

# Create test data loader for final testing
test_loader = DataLoader(
    test_dataset,
    batch_size=trial.params['batch_size'],
    shuffle=False,
    collate_fn=trainer.collate_fn
)

# Load the final model
final_model_path = 'models/final_model.pt'
final_model = BiLSTMAttention(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    hidden_dim=trial.params['hidden_dim'],
    num_layers=num_layers,
    dropout=trial.params['dropout'],
    pretrained_embeddings=embeddings
)
final_model.load_state_dict(torch.load(final_model_path))
final_model.to(device)

# Now evaluate on test set
print("\nPerforming evaluation on test set (dev.csv)...")
final_model.eval()
test_loss = 0.0
all_predictions = []
all_labels = []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        # Move batch to device
        claim_ids = batch['claim_ids'].to(device)
        claim_lengths = batch['claim_lengths']
        evidence_ids = batch['evidence_ids'].to(device)
        evidence_lengths = batch['evidence_lengths']
        labels = batch['labels'].to(device)

        # Forward pass
        logits, _ = final_model(claim_ids, claim_lengths, evidence_ids, evidence_lengths)
        loss = trainer.criterion(logits, labels)

        # Accumulate metrics
        test_loss += loss.item()
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        all_predictions.extend(predictions)
        all_labels.extend(labels.cpu().numpy())

# Calculate metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions, average='binary')
recall = recall_score(all_labels, all_predictions, average='binary')
f1 = f1_score(all_labels, all_predictions, average='binary')

print("\nTest Set Metrics:")
print(f"Loss: {test_loss / len(test_loader):.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Test dataset size: 5926

Performing evaluation on test set (dev.csv)...


Testing: 100%|██████████| 371/371 [00:03<00:00, 109.15it/s]


Test Set Metrics:
Loss: 0.3840
Accuracy: 0.8134
Precision: 0.6716
Recall: 0.6372
F1 Score: 0.6539





### Test the model

In [41]:
# Function to collate batches without labels for test data
def test_collate_fn(batch):
    # Separate batch elements
    claims = [item['claim_ids'] for item in batch]
    claim_lengths = torch.tensor([item['claim_length'] for item in batch])
    evidences = [item['evidence_ids'] for item in batch]
    evidence_lengths = torch.tensor([item['evidence_length'] for item in batch])

    # Pad sequences
    padded_claims = pad_sequence(claims, batch_first=True, padding_value=0)
    padded_evidences = pad_sequence(evidences, batch_first=True, padding_value=0)

    return {
        'claim_ids': padded_claims,
        'claim_lengths': claim_lengths,
        'evidence_ids': padded_evidences,
        'evidence_lengths': evidence_lengths
    }

# Set device
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print(f"Using device: {device}")

# Load vocabulary
# with open('vocab.pkl', 'rb') as f:
#     vocab = pickle.load(f)
# print(f"Vocabulary size: {len(vocab)}")

# Load the model with the same hyperparameters
model = BiLSTMAttention(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    hidden_dim=trial.params['hidden_dim'],
    num_layers=num_layers,
    dropout=trial.params['dropout'],
    pretrained_embeddings=embeddings
)

# Load trained model weights
model.load_state_dict(torch.load('models/final_model.pt', map_location=device))
model = model.to(device)
model.eval()
print("Model loaded successfully")

# Load test data
# test_file = './data/dev.csv'  # Change to your test file path
test_df = pd.read_csv('./data/dev.csv')
print(f"Test data loaded: {len(test_df)} rows")

# Create test dataset
test_dataset = EvidenceDetectionDataset(test_df, vocab, is_test=True)
print(f"Test dataset created: {len(test_dataset)} samples")

# Create test dataloader
batch_size = 32
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=test_collate_fn
)

# Run inference
print("Running inference on test data...")
all_predictions = []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        # Move batch to device
        claim_ids = batch['claim_ids'].to(device)
        claim_lengths = batch['claim_lengths']
        evidence_ids = batch['evidence_ids'].to(device)
        evidence_lengths = batch['evidence_lengths']

        # Forward pass
        logits, _ = model(claim_ids, claim_lengths, evidence_ids, evidence_lengths)

        # Get predictions (0 or 1)
        predictions = torch.argmax(logits, dim=1).cpu().numpy()
        all_predictions.extend(predictions)

# Save predictions to CSV
predictions_df = pd.DataFrame({'prediction': all_predictions})
predictions_df.to_csv('predictions.csv', index=False)
print(f"Predictions saved to 'predictions.csv'")

Model loaded successfully
Test data loaded: 5926 rows
Test dataset created: 5926 samples
Running inference on test data...


Testing: 100%|██████████| 186/186 [00:02<00:00, 73.30it/s]

Predictions saved to 'predictions.csv'



