In [1]:
import pickle
import numpy as np
import pandas as pd
import re

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    ffn_hyperparameter_search,
)

In [4]:
output_dir = "talklife_moc_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

Talklife MoC

In [5]:
%run load_talklifemoc.py

In [6]:
%run load_sbert-embeddings.py

In [7]:
sbert_embeddings.shape

torch.Size([18604, 384])

Baseline: FFN

In [8]:
num_epochs = 100
hidden_dim_sizes = [[64,64],[128,128],[256,256],[512, 512]]
dropout_rates = [0.1, 0.2]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5

In [9]:
#create indices for kfold
fold_col_names = [c for c in df.columns if 'fold' in c ]
fold_list = []
for foldc in fold_col_names:
    fold_list.append((df[df[foldc]=='train'].index, df[df[foldc]=='dev'].index, df[df[foldc]=='test'].index))
fold_list = tuple(fold_list)

In [10]:
ffn_current, best_ffn_current, _, __ = ffn_hyperparameter_search( 
    num_epochs=num_epochs,
    x_data=sbert_embeddings,
    y_data=y_data,
    hidden_dim_sizes=hidden_dim_sizes,
    output_dim=output_dim,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    split_ids=None, 
    split_indices=fold_list,
    k_fold=True,
    patience=patience,
    validation_metric=validation_metric,
    results_output= f"{output_dir}/ffn_current_focal_{gamma}.csv",
    verbose=False
)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

KeyboardInterrupt: 

: 

In [None]:
best_ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,valid_recall_scores,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size
0,,0.806923,0.53252,"[0.8877564102564103, 0.4314827828531272, 0.278...",0.528515,"[0.8937145069695406, 0.40878828229027964, 0.28...",0.537493,"[0.8818772287315334, 0.4568452380952381, 0.273...",,0.798716,...,"[0.8726038932091733, 0.47889485801995396, 0.24...","(64, 64)",0.5,0.0001,1,focal,2,True,5,64
0,,0.804451,0.530992,"[0.8861360718870347, 0.4303015910710045, 0.276...",0.524176,"[0.8932453416149069, 0.41275626423690204, 0.26...",0.538625,"[0.8791390728476821, 0.4494047619047619, 0.287...",,0.799508,...,"[0.8732973401357175, 0.4716039907904835, 0.268...","(64, 64)",0.5,0.0001,12,focal,2,True,5,64
0,,0.812943,0.531022,"[0.8918798523609521, 0.4297560975609756, 0.271...",0.533497,"[0.8913126430933604, 0.4227447216890595, 0.286...",0.529123,"[0.8924477840040754, 0.43700396825396826, 0.25...",,0.806511,...,"[0.885828916736837, 0.4508825786646201, 0.2424...","(64, 64)",0.5,0.0001,123,focal,2,True,5,64


In [None]:
best_ffn_current["f1"].mean()

0.531511120545299

In [None]:
best_ffn_current["precision"].mean()

0.5287290682197932

In [None]:
best_ffn_current["recall"].mean()

0.5350802865249041

In [None]:
np.stack(best_ffn_current["f1_scores"]).mean(axis=0)

array([0.88859078, 0.43051349, 0.27542909])

In [None]:
np.stack(best_ffn_current["precision_scores"]).mean(axis=0)

array([0.8927575 , 0.41476309, 0.27866662])

In [None]:
np.stack(best_ffn_current["recall_scores"]).mean(axis=0)

array([0.88448803, 0.44775132, 0.27300151])