In [10]:
import logging
import pandas as pd
from pathlib import Path
import re
import json
import torch

# Hyperparameter tuning
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

# Text processing
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

# SVM
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    f1_score,
    matthews_corrcoef
)
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler

# Set up logging
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO)

In [11]:
# Path configuration
DATA_DIR = Path('./data')
TRAIN_PATH = DATA_DIR / 'train.csv'
DEV_PATH = DATA_DIR / 'dev.csv'
MODEL_SAVE_PATH = Path('./models')
MODEL_SAVE_PATH.mkdir(parents=True, exist_ok=True)

N_TRIALS = 25  # Number of Optuna trials

In [12]:
def get_device() -> torch.device:
    """Determine the device to use for computations."""
    if torch.cuda.is_available():
        return torch.device('cuda')
    elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
        return torch.device('mps')
    else:
        return torch.device('cpu')
    
get_device()

device(type='cpu')

In [13]:
def prepare_data(train_df, dev_df):
    """Prepare data for SVM training."""
    # Combine claim and evidence into a single text feature for TF-IDF
    train_df['text'] = train_df['Claim'] + " [SEP] " + train_df['Evidence']
    dev_df['text'] = dev_df['Claim'] + " [SEP] " + dev_df['Evidence']
    
    # Extract labels
    train_labels = train_df['label'].values
    dev_labels = dev_df['label'].values
    
    return train_df, dev_df, train_labels, dev_labels

In [14]:
class TextPreprocessor:
    def __init__(self):
        self.lemmatizer = WordNetLemmatizer()
    
    def preprocess(self, text):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text) # remove special chars
        words = word_tokenize(text)
        return ' '.join([self.lemmatizer.lemmatize(w) for w in words])
    
    def fit_transform(self, X, y=None):
        return [self.preprocess(text) for text in X]
    
    def transform(self, X):
        return [self.preprocess(text) for text in X]
    
    def fit(self, X, y=None):
        return self

In [15]:
class FeatureExtractor:
    def fit_transform(self, X, y=None):
        features = []
        
        for text in X:
            claim, evidence = text.split("[SEP]")
            
            feature_dict = {
                'text_length': len(text),
                'claim_length': len(claim),
                'evidence_length': len(evidence),
                'word_overlap': len(set(claim.split()) & set(evidence.split())),
                'claim_words': len(claim.split()),
                'evidence_words': len(evidence.split())
            }
            
            features.append(feature_dict)
            
        return pd.DataFrame(features)
    
    def transform(self, X):
        return self.fit_transform(X)
    
    def fit(self, X, y=None):
        return self

In [None]:
trial_number = 0

def objective(trial):
    """Optuna objective function for hyperparameter optimization."""
    # Load data
    global trial_number
    trial_number += 1
    
    logging.info("Loading datasets...")
    train_df = pd.read_csv(TRAIN_PATH)
    dev_df = pd.read_csv(DEV_PATH)
    
    logging.info(f"Training data shape: {train_df.shape}")
    logging.info(f"Development data shape: {dev_df.shape}")
    
    # Prepare data
    train_df, dev_df, train_labels, dev_labels = prepare_data(train_df, dev_df)
    
    # Suggest hyperparameters
    C = trial.suggest_float("C", 0.01, 100.0, log=True)
    kernel = trial.suggest_categorical("kernel", ["linear", "rbf", "poly", "sigmoid"])
    gamma = trial.suggest_categorical("gamma", ["scale", "auto"]) if kernel in ["rbf", "poly", "sigmoid"] else "scale"
    
    if kernel == "poly":
        degree = trial.suggest_int("degree", 2, 5)
    else:
        degree = 3  # Default value
    
    # TF-IDF vectorizer parameters
    max_features = trial.suggest_categorical("max_features", [5000, 10000, 15000, 20000])
    min_df = trial.suggest_categorical("min_df", [1, 2, 3, 4, 5])
    max_df = trial.suggest_categorical("max_df", [0.5, 0.6, 0.7, 0.8, 0.9])
    ngram_range = trial.suggest_categorical("ngram_range", [(1, 1), (1, 2), (1, 3)])
    
    # Create pipeline
    pipeline = Pipeline([
        ('features', FeatureUnion([
            ('text_features', Pipeline([
                ('tfidf', TfidfVectorizer(
                    max_features=max_features,
                    min_df=min_df,
                    max_df=max_df,
                    ngram_range=ngram_range,
                    stop_words='english',
                    analyzer='word',
                    token_pattern=r'\w+',
                    sublinear_tf=True
                ))
            ])),
            ('custom_features', FeatureExtractor())
        ])),
        ('scaler', StandardScaler(with_mean=False)),  # TF-IDF matrices are sparse
        ('svm', SVC(
            C=C,
            kernel=kernel,
            gamma=gamma,
            degree=degree if kernel == "poly" else 3,
            probability=True
        ))
    ])
    
    # Train model
    logging.info(f"Training SVM with hyperparameters: C={C}, kernel={kernel}, gamma={gamma}")
    pipeline.fit(train_df['text'], train_labels)
    
    # Evaluate on dev set
    dev_preds = pipeline.predict(dev_df['text'])
    accuracy = accuracy_score(dev_labels, dev_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(dev_labels, dev_preds, average='binary')
    
    logging.info(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
    
    with open(f'svm_{trial_number}.json', 'w') as f:
        json.dumps(trial.params + {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f1': f1})
    
    return accuracy

In [17]:
if __name__ == "__main__":
    print("\nHYPERPARAMETER TUNING")
    print("=====================")
    print(f"Running {N_TRIALS} trials...")
    
    # Check if GPU is available for NumPy/SciPy operations
    device = get_device()
    logging.info(f"Using device: {device} (Note: scikit-learn SVM implementation will utilize CPU)")
    
    # Create a study with TPE sampler and MedianPruner
    sampler = TPESampler(seed=42)  # TPE sampler as requested
    pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=5, interval_steps=2)
    
    study = optuna.create_study(
        direction='maximize',  # Maximize accuracy
        sampler=sampler,
        pruner=pruner,
        study_name='svm_evidence_detection'
    )
    
    try:
        study.optimize(objective, n_trials=N_TRIALS, n_jobs=6)
    except KeyboardInterrupt:
        print("Hyperparameter tuning interrupted.")
    
    print("\nBest trial:")
    trial = study.best_trial
    print(f"  Value (Accuracy): {trial.value}")
    print("  Params:")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

2025-03-26 18:15:42 - Using device: cpu (Note: scikit-learn SVM implementation will utilize CPU)
[I 2025-03-26 18:15:42,305] A new study created in memory with name: svm_evidence_detection
2025-03-26 18:15:42 - Loading datasets...
2025-03-26 18:15:42 - Loading datasets...
2025-03-26 18:15:42 - Loading datasets...
2025-03-26 18:15:42 - Loading datasets...
2025-03-26 18:15:42 - Loading datasets...
2025-03-26 18:15:42 - Loading datasets...



HYPERPARAMETER TUNING
Running 25 trials...


2025-03-26 18:15:42 - Training data shape: (21508, 3)
2025-03-26 18:15:42 - Training data shape: (21508, 3)
2025-03-26 18:15:42 - Training data shape: (21508, 3)
2025-03-26 18:15:42 - Development data shape: (5926, 3)
2025-03-26 18:15:42 - Development data shape: (5926, 3)
2025-03-26 18:15:42 - Training data shape: (21508, 3)
2025-03-26 18:15:42 - Training data shape: (21508, 3)
2025-03-26 18:15:42 - Development data shape: (5926, 3)
2025-03-26 18:15:42 - Training data shape: (21508, 3)
2025-03-26 18:15:42 - Training SVM with hyperparameters: C=98.07659333226346, kernel=rbf, gamma=auto
2025-03-26 18:15:42 - Training SVM with hyperparameters: C=6.756447544726421, kernel=poly, gamma=auto
2025-03-26 18:15:42 - Development data shape: (5926, 3)
2025-03-26 18:15:42 - Development data shape: (5926, 3)
2025-03-26 18:15:42 - Development data shape: (5926, 3)
2025-03-26 18:15:42 - Training SVM with hyperparameters: C=20.180756811464974, kernel=linear, gamma=scale
2025-03-26 18:15:43 - Training 

In [None]:
params = {
    'C': 1.0952526897000217, 
    'kernel': 'rbf', 
    'gamma': 'auto', 
    'max_features': 5000, 
    'min_df': 3, 
    'ngram_range': (1, 3)
}

logging.info("Loading datasets...")
train_df = pd.read_csv(TRAIN_PATH)
dev_df = pd.read_csv(DEV_PATH)

logging.info(f"Training data shape: {train_df.shape}")
logging.info(f"Development data shape: {dev_df.shape}")

# Prepare data
train_df, dev_df, train_labels, dev_labels = prepare_data(train_df, dev_df)

2025-03-26 17:38:58 - Loading datasets...
2025-03-26 17:38:58 - Training data shape: (21508, 3)
2025-03-26 17:38:58 - Development data shape: (5926, 3)
2025-03-26 17:38:58 - Training SVM with hyperparameters: C=1.0952526897000217, kernel=rbf, gamma=auto
2025-03-26 17:52:34 - Accuracy: 0.8046, Precision: 0.7304, Recall: 0.4659, F1: 0.5689


In [8]:
def calculate_all_metrics(y_true, y_pred):
    """
    Calculate comprehensive evaluation metrics.
    
    Returns:
        dict: Dictionary containing all metrics
    """
    # Basic accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # Calculate precision, recall, f1 (macro)
    macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='macro'
    )
    
    # Calculate precision, recall, f1 (weighted)
    weighted_precision, weighted_recall, weighted_f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='weighted'
    )
    
    # Matthews Correlation Coefficient
    mcc = matthews_corrcoef(y_true, y_pred)
    
    metrics = {
        'Accuracy': accuracy,
        'Macro-P': macro_precision,
        'Macro-R': macro_recall,
        'Macro-F1': macro_f1,
        'W Macro-P': weighted_precision,
        'W Macro-R': weighted_recall,
        'W Macro-F1': weighted_f1,
        'MCC': mcc
    }
    
    return metrics

In [None]:
_, dev_df, _, dev_labels = prepare_data(train_df, dev_df)

# Example usage in your evaluation code:
trial_preds = pipeline.predict(dev_df['text'])
predictions = pd.DataFrame({'prediction': trial_preds})
predictions.to_csv('predictions.csv', index=False)

metrics = calculate_all_metrics(dev_labels, trial_preds)

# Print metrics in tabular format
print("\nEvaluation Metrics:")
print("-" * 80)
print(f"{'Metric':<12} {'Value':<8}")
print("-" * 80)
for metric, value in metrics.items():
    print(f"{metric:<12} {value:.4f}")
print("-" * 80)

# Log metrics
logging.info(f"Trial Metrics: Accuracy={metrics['Accuracy']:.4f}, Macro-F1={metrics['Macro-F1']:.4f}, W Macro-F1={metrics['W Macro-F1']:.4f}, MCC={metrics['MCC']:.4f}")

2025-03-26 17:52:49 - Trial Metrics: Accuracy=0.8046, Macro-F1=0.7213, W Macro-F1=0.7893, MCC=0.4695



Evaluation Metrics:
--------------------------------------------------------------------------------
Metric       Value   
--------------------------------------------------------------------------------
Accuracy     0.8046
Macro-P      0.7754
Macro-R      0.7000
Macro-F1     0.7213
W Macro-P    0.7956
W Macro-R    0.8046
W Macro-F1   0.7893
MCC          0.4695
--------------------------------------------------------------------------------
