In [1]:
import logging
import pandas as pd
import torch
import optuna
from pathlib import Path
from sentence_transformers import SentenceTransformer, losses, InputExample
from sentence_transformers.evaluation import BinaryClassificationEvaluator
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    AutoConfig
)
from torch.utils.data import DataLoader, Dataset as TorchDataset
from datasets import Dataset, DatasetDict

import builtins
builtins.Dataset = Dataset
builtins.DatasetDict = DatasetDict

import os
os.environ['WANDB_DISABLED'] = 'true'

logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Path configuration
DATA_DIR = Path(__file__).parent.parent / "data"
TRAIN_FILE = DATA_DIR / "train.csv"
DEV_FILE = DATA_DIR / "dev.csv"
AUG_TRAIN_FILE = DATA_DIR / "train_augmented.csv"
AUG_TRAIN_HIGH_REPLACEMENT_FILE = DATA_DIR / "train_augmented_high_replacement_fraction.csv"
SAVE_DIR = DATA_DIR / "results" / "sentence-transformer"
SAVE_DIR.mkdir(parents=True, exist_ok=True)

# Training parameters
BATCH_SIZE = 16
NUM_EPOCHS = 3
LEARNING_RATE = 2e-5
MAX_SEQ_LENGTH = 128
BASE_MODEL = 'sentence-transformers/gtr-t5-base'

#Optuna
N_TRIALS = 10


In [3]:
def get_device() -> torch.device:
    """
    Determine the device to use for computations.
    
    Returns:
        Device object (cuda, mps, or cpu)
    """
    if torch.cuda.is_available():
        return torch.device('cuda')
    elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
        return torch.device('mps')
    else:
        return torch.device('cpu')

In [4]:
class EvidenceDetectionDataset(TorchDataset):
    def __init__(self, dataframe):
        self.examples = []
        
        for _, row in dataframe.iterrows():
            claim = row['Claim']
            evidence = row['Evidence']
            label = float(row['label'])
            
            self.examples.append(InputExample(
                texts=[claim, evidence],
                label=label
            ))
    
    def __len__(self):
        return len(self.examples)
    
    def __getitem__(self, idx):
        return self.examples[idx]

def load_data():
    """Load and prepare the training and development datasets."""
    logging.info("Loading datasets...")
    train_df = pd.read_csv(TRAIN_FILE)
    dev_df = pd.read_csv(DEV_FILE)
    
    logging.info(f"Training data shape: {train_df.shape}")
    logging.info(f"Development data shape: {dev_df.shape}")
    
    # Create datasets
    train_dataset = EvidenceDetectionDataset(train_df)
    dev_dataset = EvidenceDetectionDataset(dev_df)
    
    return train_dataset, dev_dataset, dev_df

In [None]:
device = get_device()
logging.info(f"Using device: {device}")

train_dataset, dev_dataset, dev_df = load_data()

model = SentenceTransformer(BASE_MODEL)
model.to(device)

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE)
train_loss = losses.ContrastiveLoss(model)

evaluator = BinaryClassificationEvaluator.from_input_examples(
    dev_dataset.examples, 
    name='evidence-detection-dev'
)

# Train the model
logging.info("Starting training...")
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=evaluator,
    epochs=NUM_EPOCHS,
    evaluation_steps=1000,
    warmup_steps=100,
    output_path=str(SAVE_DIR / f"{BASE_MODEL.split('/')[-1]}.pt"),
    show_progress_bar=True
)

logging.info(f"Model saved to {SAVE_DIR / f"{BASE_MODEL.split('/')[-1]}.pt"}")

In [9]:
def objective(trial):
    device = get_device()
    logging.info(f"Using device: {device}")

    # Load datasets
    train_dataset, dev_dataset, dev_df = load_data()

    # Suggest hyperparameters via optuna
    # batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    # warmup_steps = trial.suggest_int("warmup_steps", 50, 200, step=50)
    # evaluation_steps = trial.suggest_int("evaluation_steps", 500, 2000, step=500)
    # weight_decay = trial.suggest_float("weight_decay", 1e-7, 1e-4, log=True)
    # lr = trial.suggest_float("lr", 1e-6, 1e-3, log=True)
    # early_stopping_patience = trial.suggest_int("early_stopping_patience", 3, 5)

    batch_size = trial.suggest_categorical("batch_size", [16])
    warmup_steps = trial.suggest_int("warmup_steps", 50, 200, step=50)
    evaluation_steps = trial.suggest_int("evaluation_steps", 500, 2000, step=500)
    weight_decay = trial.suggest_float("weight_decay", 1e-7, 1e-4, log=True)
    lr = trial.suggest_float("lr", 1e-6, 1e-3, log=True)
    early_stopping_patience = trial.suggest_int("early_stopping_patience", 3, 5)

    # Initialize model and move to device
    model = SentenceTransformer(BASE_MODEL)
    model.to(device)

    # Prepare dataloader and loss function
    train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
    train_loss = losses.ContrastiveLoss(model)

    # Setup evaluator for development data
    evaluator = BinaryClassificationEvaluator.from_input_examples(
        dev_dataset.examples, 
        name='evidence-detection-dev'
    )

    # Train the model with early stopping and weight decay
    logging.info("Starting training...")
    model.fit(
        train_objectives=[(train_dataloader, train_loss)],
        evaluator=evaluator,
        epochs=10,  # Fixed number of epochs; early stopping will halt training if no improvement is seen
        evaluation_steps=evaluation_steps,
        warmup_steps=warmup_steps,
        output_path=str(MODEL_SAVE_PATH),  # Ensure output_path is a string
        show_progress_bar=False,
        optimizer_params={'weight_decay': weight_decay, 'lr': lr},
        callback=EarlyStoppingCallback(early_stopping_patience=early_stopping_patience)
    )

    # Evaluate the trained model on the dev set
    metrics = evaluator(model)
    accuracy = metrics.get("accuracy", 0.0)
    logging.info(f"Trial finished with accuracy: {accuracy}")

    # Return accuracy as the objective value
    return accuracy

In [10]:

print("\nHYPERPARAMETER TUNING")
print("=====================")
print(f"Running {N_TRIALS} trials...")

# Create a study
study = optuna.create_study(direction='maximize', study_name='rgcn_tuning')

# Run optimization
try:
    study.optimize(objective, n_trials=N_TRIALS)
except KeyboardInterrupt:
    print("Hyperparameter tuning interrupted.")

[I 2025-03-13 17:29:33,183] A new study created in memory with name: rgcn_tuning
2025-03-13 17:29:33 - Using device: mps
2025-03-13 17:29:33 - Loading datasets...
2025-03-13 17:29:33 - Training data shape: (21508, 3)
2025-03-13 17:29:33 - Development data shape: (5926, 3)



HYPERPARAMETER TUNING
Running 10 trials...


2025-03-13 17:29:33 - Use pytorch device_name: mps
2025-03-13 17:29:33 - Load pretrained SentenceTransformer: sentence-transformers/gtr-t5-base
[W 2025-03-13 17:29:35,854] Trial 0 failed with parameters: {'batch_size': 4, 'warmup_steps': 100, 'evaluation_steps': 500, 'weight_decay': 3.453149453019887e-06, 'lr': 7.814165395609636e-06, 'early_stopping_patience': 3} because of the following error: RuntimeError('MPS backend out of memory (MPS allocated: 18.11 GB, other allocations: 12.88 MB, max allowed: 18.13 GB). Tried to allocate 94.12 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).').
Traceback (most recent call last):
  File "/Users/harvey/School/Year 3/NLU/comp34812/.venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/j5/14j5156n70d__sf0709mj2qr0000gn/T/ipykernel_83578/45052119

RuntimeError: MPS backend out of memory (MPS allocated: 18.11 GB, other allocations: 12.88 MB, max allowed: 18.13 GB). Tried to allocate 94.12 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [None]:
# Optional: Perform evaluation on dev set
model = SentenceTransformer(MODEL_SAVE_PATH)  # Load the saved model

In [None]:
import os
import subprocess
from IPython.display import FileLink, display

def download_file(path, download_file_name):
    os.chdir('/kaggle/working/')
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip {zip_name} {path} -r"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f'{download_file_name}.zip'))

download_file('/kaggle/working/models/evidence_detection_model', 'out')
