In [1]:
import pickle
import numpy as np
import pandas as pd
import re
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cuda:2"

In [3]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    histories_baseline_hyperparameter_search,
)

In [4]:
output_dir = "stance_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Stance Classification

In [6]:
%run load_sbert-embeddings.py

In [7]:
df.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,1,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,3,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


In [8]:
sbert_embeddings.shape

(5568, 384)

## Baseline: Averaging history and use FFN

In [10]:
num_epochs = 100
hidden_dim_sizes = [[64, 64], [128, 128], [256, 256], [512, 512]]
dropout_rates = [0.1, 0.2]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5
split_indices = (
    df[df["set"] == "train"].index,
    df[df["set"] == "dev"].index,
    df[df["set"] == "test"].index,
)

In [None]:
(
    ffn_mean_history,
    best_ffn_mean_history,
    _,
    __,
) = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=df,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    split_ids=None,  # torch.tensor(df['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_mean_history_focal_{gamma}.csv",
    verbose=False,
)

In [None]:
best_ffn_mean_history

In [None]:
best_ffn_mean_history["f1"].mean()

In [None]:
best_ffn_mean_history["precision"].mean()

In [None]:
best_ffn_mean_history["recall"].mean()

In [None]:
np.stack(best_ffn_mean_history["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_ffn_mean_history["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_ffn_mean_history["recall_scores"]).mean(axis=0)