In [None]:
import pickle
import numpy as np
import pandas as pd
import re
import os

seed = 2023

In [None]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    ffn_hyperparameter_search,
)

In [None]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [None]:
%run load_sbert-embeddings.py

In [None]:
df_rumours.head()

In [None]:
sbert_embeddings.shape

## Baseline: FFN

In [None]:
num_epochs = 100
hidden_dim_sizes = [[64, 64], [128, 128], [256, 256], [512, 512]]
dropout_rates = [0.1]
learning_rates = [1e-3, 5e-4, 1e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 3

In [None]:
kwargs = {
    "num_epochs": num_epochs,
    "x_data": sbert_embeddings,
    "y_data": y_data,
    "output_dim": output_dim,
    "hidden_dim_sizes": hidden_dim_sizes,
    "dropout_rates": dropout_rates,
    "learning_rates": learning_rates,
    "seeds": seeds,
    "loss": loss,
    "gamma": gamma,
    "device": device,
    "split_ids": split_ids,
    "k_fold": True,
    "patience": patience,
    "validation_metric": validation_metric,
    "verbose": False,
}

In [None]:
ffn_current, best_ffn_current, _, __ = ffn_hyperparameter_search(
    results_output=f"{output_dir}/ffn_current_focal_{gamma}.csv",
    **kwargs,
)

In [None]:
ffn_current

In [None]:
best_ffn_current

In [None]:
best_ffn_current["f1"].mean()

In [None]:
best_ffn_current["precision"].mean()

In [None]:
best_ffn_current["recall"].mean()

In [None]:
np.stack(best_ffn_current["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_ffn_current["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_ffn_current["recall_scores"]).mean(axis=0)