In [None]:
import numpy as np
import pickle
import os

seed = 2023

In [None]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    histories_baseline_hyperparameter_search,
)

In [None]:
output_dir = "client_talk_type_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## AnnoMI

In [None]:
%run ../load_anno_mi.py

In [None]:
anno_mi.head()

In [None]:
with open("../anno_mi_sbert.pkl", "rb") as f:
    sbert_embeddings = pickle.load(f)

sbert_embeddings.shape

# Baseline: Averaging history and use FFN

Here, we average the full history of a path and concatenate it the current embedding (the total number of features that are passed into the FFN is `2 * sbert_embeddings.shape[0]`).

Here, we will run the hyperparameter search to implement the FFN with the same parameters as the standard FFN baseline on the sentence embeddings. Going to try out some variations (1 hidden layer, 2 hidden layers and 3 hidden layers - all of size 100).

In [None]:
num_epochs = 100
hidden_dim_sizes = [[64, 64], [128, 128], [256, 256], [512, 512]]
dropout_rates = [0.1]
learning_rates = [1e-3, 5e-4, 1e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 3

In [None]:
kwargs = {
    "num_epochs": num_epochs,
    "df": anno_mi,
    "id_column": "transcript_id",
    "label_column": "client_talk_type",
    "embeddings": sbert_embeddings,
    "y_data": y_data_client,
    "output_dim": output_dim_client,
    "hidden_dim_sizes": hidden_dim_sizes,
    "dropout_rates": dropout_rates,
    "learning_rates": learning_rates,
    "seeds": seeds,
    "loss": loss,
    "gamma": gamma,
    "device": device,
    "path_indices": client_index,
    "split_ids": client_transcript_id,
    "k_fold": True,
    "patience": patience,
    "validation_metric": validation_metric,
    "verbose": False,
}

In [None]:
(
    ffn_mean_history_kfold,
    best_ffn_mean_history_kfold,
    _,
    __,
) = histories_baseline_hyperparameter_search(
    use_signatures=False,
    results_output=f"{output_dir}/ffn_mean_history_focal_{gamma}_kfold.csv",
    **kwargs,
)

In [None]:
ffn_mean_history_kfold

In [None]:
ffn_mean_history_kfold.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()

In [None]:
best_ffn_mean_history_kfold

In [None]:
best_ffn_mean_history_kfold["f1"].mean()

In [None]:
best_ffn_mean_history_kfold["precision"].mean()

In [None]:
best_ffn_mean_history_kfold["recall"].mean()

In [None]:
np.stack(best_ffn_mean_history_kfold["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_ffn_mean_history_kfold["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_ffn_mean_history_kfold["recall_scores"]).mean(axis=0)