In [1]:
import pickle
import numpy as np
import pandas as pd
import re
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    ffn_hyperparameter_search,
)

In [4]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [5]:
%run load_sbert-embeddings.py

In [6]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


In [7]:
sbert_embeddings.shape

(5568, 384)

## Baseline: FFN

In [8]:
num_epochs = 100
hidden_dim_sizes = [[64,64],[128,128],[256,256]]
dropout_rates = [0.5, 0.1]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5
split_indices = (df_rumours[df_rumours['set']=='train'].index,
                 df_rumours[df_rumours['set']=='dev'].index,
                 df_rumours[df_rumours['set']=='test'].index)

In [9]:
ffn_current, best_ffn_current, _, __ = ffn_hyperparameter_search( 
    num_epochs=num_epochs,
    x_data=sbert_embeddings,
    y_data=y_data,
    hidden_dim_sizes=hidden_dim_sizes,
    output_dim=output_dim,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_current_focal_{gamma}.csv",
    verbose=False
)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in rumours_output/ffn_current_focal_2.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/ffn_current_focal_2_best_model.csv


In [10]:
ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size,model_id
0,0.266496,0.510963,0.496127,"[0.5825874694873882, 0.4096662830840046]",0.504428,"[0.5264705882352941, 0.4823848238482385]",0.504047,"[0.6520947176684881, 0.356]",0.238653,0.597865,...,"(64, 64)",0.5,0.001,1,focal,2,False,,64,0
0,0.295249,0.49571,0.486025,"[0.5565800502933781, 0.41546961325966847]",0.489863,"[0.515527950310559, 0.4641975308641975]",0.490368,"[0.604735883424408, 0.376]",0.250996,0.597865,...,"(64, 64)",0.5,0.001,12,focal,2,False,,64,0
0,0.26298,0.518589,0.501541,"[0.5937248592115849, 0.4093567251461988]",0.512329,"[0.531700288184438, 0.49295774647887325]",0.511066,"[0.6721311475409836, 0.35]",0.236219,0.601424,...,"(64, 64)",0.5,0.001,123,focal,2,False,,64,0
0,0.266704,0.51001,0.486818,"[0.5959119496855345, 0.37772397094431]",0.501366,"[0.5242047026279392, 0.4785276073619632]",0.501173,"[0.6903460837887068, 0.312]",0.232962,0.608541,...,"(64, 64)",0.5,0.0001,1,focal,2,False,,64,1
0,0.264475,0.510963,0.488889,"[0.5951065509076557, 0.38267148014440433]",0.502716,"[0.5250696378830083, 0.48036253776435045]",0.502352,"[0.6867030965391621, 0.318]",0.233606,0.629893,...,"(64, 64)",0.5,0.0001,12,focal,2,False,,64,1
0,0.265659,0.509056,0.489364,"[0.5896414342629482, 0.38908659549228947]",0.501107,"[0.5240793201133145, 0.478134110787172]",0.500976,"[0.6739526411657559, 0.328]",0.241017,0.6121,...,"(64, 64)",0.5,0.0001,123,focal,2,False,,64,1
0,0.264648,0.517636,0.502152,"[0.5899513776337115, 0.41435185185185186]",0.511573,"[0.5313868613138686, 0.49175824175824173]",0.510512,"[0.663023679417122, 0.358]",0.237069,0.601424,...,"(64, 64)",0.5,0.0005,1,focal,2,False,,64,2
0,0.270322,0.509056,0.482413,"[0.5998445998445998, 0.3649815043156597]",0.49946,"[0.5230352303523035, 0.4758842443729904]",0.499548,"[0.7030965391621129, 0.296]",0.234145,0.615658,...,"(64, 64)",0.5,0.0005,12,focal,2,False,,64,2
0,0.267894,0.518589,0.50854,"[0.5788156797331109, 0.43826473859844267]",0.51379,"[0.5338461538461539, 0.49373433583959897]",0.513029,"[0.6320582877959927, 0.394]",0.239344,0.619217,...,"(64, 64)",0.5,0.0005,123,focal,2,False,,64,2
0,0.266701,0.511916,0.499534,"[0.5782537067545305, 0.42081447963800905]",0.506097,"[0.5278195488721804, 0.484375]",0.505672,"[0.639344262295082, 0.372]",0.242532,0.615658,...,"(64, 64)",0.1,0.001,1,focal,2,False,,64,3


In [11]:
best_ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,valid_recall_scores,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size
0,0.253243,0.517636,0.516824,"[0.5366300366300366, 0.49701789264413515]",0.516833,"[0.5395948434622467, 0.49407114624505927]",0.516849,"[0.5336976320582878, 0.5]",0.237407,0.615658,...,"[0.6368715083798883, 0.5784313725490197]","(128, 128)",0.1,0.001,1,focal,2,False,,64
0,0.265313,0.508103,0.49435,"[0.5777414075286417, 0.4109589041095891]",0.50162,"[0.524517087667162, 0.4787234042553192]",0.501494,"[0.6429872495446266, 0.36]",0.230795,0.637011,...,"[0.7206703910614525, 0.49019607843137253]","(128, 128)",0.1,0.001,12,focal,2,False,,64
0,0.264338,0.530982,0.514549,"[0.6038647342995169, 0.42523364485981313]",0.526181,"[0.5411255411255411, 0.5112359550561798]",0.52353,"[0.6830601092896175, 0.364]",0.243811,0.622776,...,"[0.7374301675977654, 0.4215686274509804]","(128, 128)",0.1,0.001,123,focal,2,False,,64


In [12]:
best_ffn_current["f1"].mean()

0.5085744366786221

In [13]:
best_ffn_current["precision"].mean()

0.514877996301918

In [14]:
best_ffn_current["recall"].mean()

0.5139574984820886

In [15]:
np.stack(best_ffn_current["f1_scores"]).mean(axis=0)

array([0.57274539, 0.44440348])

In [16]:
np.stack(best_ffn_current["precision_scores"]).mean(axis=0)

array([0.53507916, 0.49467684])

In [17]:
np.stack(best_ffn_current["recall_scores"]).mean(axis=0)

array([0.619915, 0.408   ])