In [1]:
import pickle
import numpy as np
import pandas as pd
import re
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    ffn_hyperparameter_search,
)

In [4]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [5]:
%run load_sbert_embeddings.py

In [6]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


In [7]:
sbert_embeddings.shape

(5568, 384)

## Baseline: FFN

In [8]:
num_epochs = 100
hidden_dim_sizes = [[64, 64], [128, 128], [256, 256], [512, 512]]
dropout_rates = [0.1]
learning_rates = [1e-3, 5e-4, 1e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 3

In [9]:
kwargs = {
    "num_epochs": num_epochs,
    "x_data": sbert_embeddings,
    "y_data": y_data,
    "output_dim": output_dim,
    "hidden_dim_sizes": hidden_dim_sizes,
    "dropout_rates": dropout_rates,
    "learning_rates": learning_rates,
    "seeds": seeds,
    "loss": loss,
    "gamma": gamma,
    "device": device,
    "split_ids": split_ids,
    "k_fold": True,
    "patience": patience,
    "validation_metric": validation_metric,
    "verbose": False,
}

In [10]:
ffn_current, best_ffn_current, _, __ = ffn_hyperparameter_search(
    results_output=f"{output_dir}/ffn_current_focal_{gamma}_kfold.csv",
    **kwargs,
)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in rumours_output/ffn_current_focal_2.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/ffn_current_focal_2_best_model.csv


In [11]:
ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size,model_id
0,,0.619758,0.579875,"[0.7093188942718723, 0.4504310344827586]",0.580342,"[0.705099150141643, 0.4555858310626703]",0.579491,"[0.7135894495412844, 0.44539158231220033]",,0.646583,...,"(64, 64)",0.1,0.001,1,focal,2,True,5,64,0
0,,0.61342,0.575318,"[0.7025243832472748, 0.4481106971793507]",0.575281,"[0.7029276693455798, 0.44763423710792133]",0.575355,"[0.7021215596330275, 0.4485881726158764]",,0.646179,...,"(64, 64)",0.1,0.001,12,focal,2,True,5,64,0
0,,0.616589,0.579772,"[0.704156479217604, 0.4553878739740535]",0.579563,"[0.7064935064935065, 0.45263157894736844]",0.580006,"[0.7018348623853211, 0.4581779435269046]",,0.646853,...,"(64, 64)",0.1,0.001,123,focal,2,True,5,64,0
0,,0.610252,0.582552,"[0.6900844819919965, 0.4750188300276174]",0.581761,"[0.7143295489413931, 0.4491927825261159]",0.585713,"[0.6674311926605505, 0.5039957378795951]",,0.641461,...,"(64, 64)",0.1,0.0005,1,focal,2,True,5,64,1
0,,0.619199,0.578126,"[0.709759909077994, 0.44649146572744514]",0.578854,"[0.7034638130104196, 0.45424476295479604]",0.577584,"[0.7161697247706422, 0.43899840170484816]",,0.647527,...,"(64, 64)",0.1,0.0005,12,focal,2,True,5,64,1
0,,0.611929,0.58422,"[0.6915555555555556, 0.47688442211055276]",0.583386,"[0.7155119558553035, 0.4512601046124584]",0.587373,"[0.669151376146789, 0.5055940330314331]",,0.641865,...,"(64, 64)",0.1,0.0005,123,focal,2,True,5,64,1
0,,0.617148,0.579516,"[0.7053084648493544, 0.45372340425531915]",0.579458,"[0.7059161401493395, 0.4530005310674456]",0.579575,"[0.7047018348623854, 0.45444858817261585]",,0.646044,...,"(64, 64)",0.1,0.0001,1,focal,2,True,5,64,2
0,,0.621062,0.584474,"[0.7077763403765991, 0.4611714815796448]",0.584288,"[0.7097146151628712, 0.4588607594936709]",0.584677,"[0.705848623853211, 0.46350559403303143]",,0.649953,...,"(64, 64)",0.1,0.0001,12,focal,2,True,5,64,2
0,,0.627213,0.592542,"[0.7113997113997114, 0.4736842105263158]",0.592086,"[0.7161533991865194, 0.46801872074882994]",0.593099,"[0.7067087155963303, 0.47948854555141185]",,0.647527,...,"(64, 64)",0.1,0.0001,123,focal,2,True,5,64,2
0,,0.61137,0.578605,"[0.6961084390030607, 0.4611010597053502]",0.577883,"[0.7079750963533946, 0.44779116465863456]",0.57993,"[0.6846330275229358, 0.47522642514651037]",,0.641057,...,"(128, 128)",0.1,0.001,1,focal,2,True,5,64,3


In [12]:
best_ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,valid_recall_scores,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size
0,,0.614166,0.578879,"[0.7007805724197744, 0.45697796432319]",0.578417,"[0.7067055393586006, 0.4501291989664083]",0.579496,"[0.694954128440367, 0.4640383590836441]",,0.647257,...,"[0.7113918536266609, 0.5431400282885431]","(512, 512)",0.1,0.0001,1,focal,2,True,5,64
0,,0.619758,0.582074,"[0.7075688073394495, 0.4565796483750666]",0.582074,"[0.7075688073394495, 0.4565796483750666]",0.582074,"[0.7075688073394495, 0.4565796483750666]",,0.650492,...,"[0.7238074493574385, 0.5314710042432814]","(512, 512)",0.1,0.0001,12,focal,2,True,5,64
0,,0.616403,0.580726,"[0.703030303030303, 0.45842105263157895]",0.580333,"[0.7077280650784428, 0.452938117524701]",0.581216,"[0.698394495412844, 0.4640383590836441]",,0.649144,...,"[0.7122631235025049, 0.5466760961810466]","(512, 512)",0.1,0.0001,123,focal,2,True,5,64


In [13]:
best_ffn_current["f1"].mean()

0.5805597246865605

In [14]:
best_ffn_current["precision"].mean()

0.5802748961071115

In [15]:
best_ffn_current["recall"].mean()

0.5809289662891691

In [16]:
np.stack(best_ffn_current["f1_scores"]).mean(axis=0)

array([0.70379323, 0.45732622])

In [17]:
np.stack(best_ffn_current["precision_scores"]).mean(axis=0)

array([0.70733414, 0.45321565])

In [18]:
np.stack(best_ffn_current["recall_scores"]).mean(axis=0)

array([0.70030581, 0.46155212])