In [1]:
import pickle
import numpy as np
import pandas as pd
import re
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    ffn_hyperparameter_search,
)

In [4]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [5]:
%run load_sbert-embeddings.py

In [6]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


In [7]:
sbert_embeddings.shape

(5568, 384)

## Baseline: FFN

In [10]:
num_epochs = 100
hidden_dim_sizes = [[64, 64], [128, 128], [256, 256]]
dropout_rates = [0.5, 0.1]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5
split_ids = torch.tensor(df_rumours['timeline_id'].astype(int))

In [11]:
ffn_current, best_ffn_current, _, __ = ffn_hyperparameter_search(
    num_epochs=num_epochs,
    x_data=sbert_embeddings,
    y_data=y_data,
    hidden_dim_sizes=hidden_dim_sizes,
    output_dim=output_dim,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    split_ids=split_ids,
    k_fold=True,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_current_focal_{gamma}.csv",
    verbose=False,
)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in rumours_output/ffn_current_focal_2.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/ffn_current_focal_2_best_model.csv


In [12]:
ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size,model_id
0,,0.612861,0.571105,"[0.7049296775110101, 0.437279869953942]",0.571775,"[0.6986764291748803, 0.4448732083792723]",0.570619,"[0.711295871559633, 0.4299413958444326]",,0.651705,...,"(64, 64)",0.5,0.001,1,focal,2,True,5,64,0
0,,0.610997,0.562636,"[0.7080710588893552, 0.41720189891091874]",0.564861,"[0.6913411636164982, 0.43838028169014087]",0.561803,"[0.7256307339449541, 0.3979754928076718]",,0.656558,...,"(64, 64)",0.5,0.001,12,focal,2,True,5,64,0
0,,0.611743,0.574358,"[0.7005032350826743, 0.4482119205298013]",0.574179,"[0.7026247476204212, 0.4457323498419389]",0.574557,"[0.698394495412844, 0.4507192328183271]",,0.649818,...,"(64, 64)",0.5,0.001,123,focal,2,True,5,64,0
0,,0.616775,0.580331,"[0.7040023034840196, 0.4566596194503171]",0.580062,"[0.7070561017929439, 0.45306764551651807]",0.580642,"[0.7009747706422018, 0.46030900372935535]",,0.649144,...,"(64, 64)",0.5,0.0001,1,focal,2,True,5,64,1
0,,0.616216,0.582337,"[0.7012911649499491, 0.4633828511858223]",0.581705,"[0.7098384728340675, 0.45357142857142857]",0.583288,"[0.692947247706422, 0.47362812999467235]",,0.649279,...,"(64, 64)",0.5,0.0001,12,focal,2,True,5,64,1
0,,0.619385,0.586687,"[0.7029386092522548, 0.47043568464730295]",0.585919,"[0.713526284701713, 0.45831227892875187]",0.587939,"[0.6926605504587156, 0.4832179009057006]",,0.645505,...,"(64, 64)",0.5,0.0001,123,focal,2,True,5,64,1
0,,0.610065,0.568617,"[0.7023335230506546, 0.43490005402485143]",0.569136,"[0.6971751412429379, 0.4410958904109589]",0.568222,"[0.7075688073394495, 0.42887586574320724]",,0.646583,...,"(64, 64)",0.5,0.0005,1,focal,2,True,5,64,2
0,,0.620503,0.5793,"[0.7109596819988644, 0.44763971785132933]",0.580108,"[0.7041619797525309, 0.4560530679933665]",0.578711,"[0.7178899082568807, 0.43953116675546083]",,0.654401,...,"(64, 64)",0.5,0.0005,12,focal,2,True,5,64,2
0,,0.619758,0.578582,"[0.7103095711445613, 0.44685466377440347]",0.579356,"[0.7037141249296567, 0.45499723909442297]",0.578014,"[0.7170298165137615, 0.43899840170484816]",,0.648201,...,"(64, 64)",0.5,0.0005,123,focal,2,True,5,64,2
0,,0.619758,0.579875,"[0.7093188942718723, 0.4504310344827586]",0.580342,"[0.705099150141643, 0.4555858310626703]",0.579491,"[0.7135894495412844, 0.44539158231220033]",,0.646583,...,"(64, 64)",0.1,0.001,1,focal,2,True,5,64,3


In [13]:
best_ffn_current

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,valid_recall_scores,hidden_dim,dropout_rate,learning_rate,seed,loss_function,gamma,k_fold,n_splits,batch_size
0,,0.616775,0.582897,"[0.7017696547722657, 0.4640250260688217]",0.582265,"[0.7102172636523781, 0.4543134252169474]",0.583841,"[0.6935206422018348, 0.474160895045285]",,0.646987,...,"[0.7092136789370508, 0.545968882602546]","(256, 256)",0.5,0.0001,1,focal,2,True,5,64
0,,0.616403,0.582104,"[0.7018255578093305, 0.46238244514106586]",0.581523,"[0.7094317516110135, 0.4536135315222963]",0.582939,"[0.6943807339449541, 0.4714970697922216]",,0.650088,...,"[0.7111740361576998, 0.5509193776520509]","(256, 256)",0.5,0.0001,12,focal,2,True,5,64
0,,0.619012,0.581768,"[0.7065747918461097, 0.4569606801275239]",0.58167,"[0.7075905692926969, 0.4557498675145734]",0.58187,"[0.7055619266055045, 0.4581779435269046]",,0.649683,...,"[0.7225005445436724, 0.5314710042432814]","(256, 256)",0.5,0.0001,123,focal,2,True,5,64


In [14]:
best_ffn_current["f1"].mean()

0.5822563592941862

In [15]:
best_ffn_current["precision"].mean()

0.5818194014683176

In [16]:
best_ffn_current["recall"].mean()

0.5828832018527841

In [17]:
np.stack(best_ffn_current["f1_scores"]).mean(axis=0)

array([0.70339   , 0.46112272])

In [18]:
np.stack(best_ffn_current["precision_scores"]).mean(axis=0)

array([0.70907986, 0.45455894])

In [19]:
np.stack(best_ffn_current["recall_scores"]).mean(axis=0)

array([0.6978211, 0.4679453])