In [1]:
import pickle
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import re

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.swnu_network_functions import (
    swnu_network_hyperparameter_search, obtain_SWNUNetwork_input
)

In [4]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [5]:
%run load_sbert-embeddings.py

In [6]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


In [7]:
x_data = obtain_SWNUNetwork_input(
    method = "gaussian_random_projection",
    dimension= 30,
    df= df_rumours,
    id_column='timeline_id',
    label_column='label',
    embeddings= sbert_embeddings,
    k=5,
    features='time_encoding',
    standardise_method=None,
    include_features_in_path=False,
)

x_data["x_data"]["path"].shape

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


torch.Size([5568, 5, 30])

## SWNU Network

In [8]:
features = ["time_encoding", "timeline_index"]
standardise_method = ["z_score", None]
include_features_in_path = True

In [9]:
num_epochs = 100
dimensions = [15]
swnu_hidden_dim_sizes_and_sig_depths = [([12], 3), ([10], 4)]
ffn_hidden_dim_sizes = [[64,64],[128,128],[256,256],[512,512]]
dropout_rates = [0.5, 0.1]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
bidirectional = True
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5
split_indices = (df_rumours[df_rumours['set']=='train'].index,
                 df_rumours[df_rumours['set']=='dev'].index,
                 df_rumours[df_rumours['set']=='test'].index)

In [10]:
# #dimensionality reduction
# embedding_dim = sbert_embeddings.shape[1]
# dim_reduce_method = ["umap"] #["gaussian_random_projection", "umap"]
# dimensions = [20]#[50,30,15]
# #time features
# features = "time_encoding"
# standardise_method = "standardise"
# include_features_in_path = False
# #SWNU block
# augmentation_tp = "Conv1d"
# hidden_dim_aug = None
# comb_m = "concatenation"
# log_sig = True
# conv_output_channels = [10] #[20, 10, 5]
# log_signature_dimensions_and_sig_depths = [(8, 3)]#[(30, 2), (10, 3), (6, 4)]
# bidirectional = False
# #ffn
# hidden_dim_sizes = [64]#[32,64]
# dropout_rates = [0.2]#[0.5, 0.2, 0.1]
# #overall training
# num_epochs = 100
# batch=64
# patience = 4
# learning_rates = [1e-4] #[1e-3, 1e-4, 5e-4]
# seeds = [0, 1, 12, 123, 1234]
# loss = "focal"
# gamma = 2
# validation_metric = "f1"
# split_indices = (df_rumours[df_rumours['set']=='train'].index,
#                  df_rumours[df_rumours['set']=='dev'].index,
#                  df_rumours[df_rumours['set']=='test'].index)

In [11]:
size=35

## UMAP

In [12]:
swnu_network_umap, best_swnu_network_umap, _, __ = swnu_network_hyperparameter_search(
    num_epochs=num_epochs,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    history_lengths=[size],
    dim_reduce_methods=["umap"],
    dimensions=dimensions,
    log_signature=True,
    swnu_hidden_dim_sizes_and_sig_depths=swnu_hidden_dim_sizes_and_sig_depths,
    ffn_hidden_dim_sizes=ffn_hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    BiLSTM=bidirectional,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    features=features, 
    standardise_method=standardise_method,
    include_features_in_path=include_features_in_path,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/swnu_network_umap_focal_{gamma}_{size}.csv",
    verbose=False
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: umap
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_umap_focal_2_35.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_umap_focal_2_35_best_model.csv


In [13]:
swnu_network_umap.groupby(["dimensions",
                           "swnu_hidden_dim",
                           "ffn_hidden_dim",
                           "dropout_rate",
                           "learning_rate"]).mean()

  swnu_network_umap.groupby(["dimensions",


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,loss,accuracy,f1,precision,recall,valid_loss,valid_accuracy,valid_f1,valid_precision,valid_recall,...,include_features_in_path,embedding_dim,num_features,log_signature,seed,BiLSTM,gamma,k_fold,batch_size,model_id
dimensions,swnu_hidden_dim,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
15,"(10,)","(64, 64)",0.1,0.0001,0.501318,0.621227,0.617775,0.631792,0.625039,0.221111,0.690391,0.65001,0.663893,0.65155,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,28.0
15,"(10,)","(64, 64)",0.1,0.0005,0.343708,0.621544,0.616494,0.638194,0.627426,0.233441,0.667853,0.642543,0.647167,0.645808,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,29.0
15,"(10,)","(64, 64)",0.1,0.001,0.309452,0.632348,0.62483,0.654893,0.638104,0.218463,0.683274,0.663414,0.661997,0.66705,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,27.0
15,"(10,)","(64, 64)",0.5,0.0001,0.400168,0.59517,0.585826,0.613313,0.601723,0.233787,0.642942,0.603019,0.621279,0.602357,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,25.0
15,"(10,)","(64, 64)",0.5,0.0005,0.363186,0.628853,0.62337,0.643795,0.633277,0.205282,0.683274,0.637378,0.65957,0.634717,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,26.0
15,"(10,)","(64, 64)",0.5,0.001,0.339524,0.632348,0.626405,0.646973,0.636349,0.232067,0.660735,0.630667,0.639123,0.633193,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,24.0
15,"(10,)","(128, 128)",0.1,0.0001,0.527819,0.618367,0.615961,0.625654,0.620849,0.271455,0.67497,0.634912,0.645017,0.632417,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,34.0
15,"(10,)","(128, 128)",0.1,0.0005,0.436833,0.603432,0.599224,0.616004,0.607295,0.260213,0.690391,0.651953,0.661197,0.649441,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,35.0
15,"(10,)","(128, 128)",0.1,0.001,0.342665,0.630442,0.625348,0.649401,0.636848,0.263992,0.639383,0.610329,0.614575,0.616433,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,33.0
15,"(10,)","(128, 128)",0.5,0.0001,0.342434,0.61932,0.611733,0.637054,0.625747,0.226806,0.62159,0.563586,0.580501,0.565916,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,31.0


In [14]:
best_swnu_network_umap

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,BiLSTM,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,0.333948,0.633937,0.627395,"[0.578021978021978, 0.6767676767676768]",0.656417,"[0.7285318559556787, 0.5843023255813954]",0.641526,"[0.4790528233151184, 0.804]",0.206417,0.661922,...,1,True,focal,2,False,,Conv1d,,concatenation,64
0,0.383187,0.608198,0.607233,"[0.5877632898696089, 0.6267029972752044]",0.614031,"[0.6540178571428571, 0.5740432612312812]",0.611849,"[0.5336976320582878, 0.69]",0.186065,0.772242,...,12,True,focal,2,False,,Conv1d,,concatenation,64
0,0.346017,0.609152,0.609023,"[0.6019417475728156, 0.6161048689138577]",0.611858,"[0.6444906444906445, 0.579225352112676]",0.611332,"[0.5646630236794171, 0.658]",0.209281,0.69395,...,123,True,focal,2,False,,Conv1d,,concatenation,64


0.6369639772772482

In [15]:
best_swnu_network_umap["f1"].mean()

0.6145504264035235

In [16]:
best_swnu_network_umap["precision"].mean()

0.6274352160857555

In [17]:
best_swnu_network_umap["recall"].mean()

0.6215689131754706

In [18]:
np.stack(best_swnu_network_umap["f1_scores"]).mean(axis=0)

array([0.58924234, 0.63985851])

In [19]:
np.stack(best_swnu_network_umap["precision_scores"]).mean(axis=0)

array([0.67568012, 0.57919031])

In [20]:
np.stack(best_swnu_network_umap["recall_scores"]).mean(axis=0)

array([0.52580449, 0.71733333])

## Unidirectional LSTM

In [12]:
swnu_network_umap_uni, best_swnu_network_umap_uni, _, __ = swnu_network_hyperparameter_search(
    num_epochs=num_epochs,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    history_lengths=[size],
    dim_reduce_methods=["umap"],
    dimensions=dimensions,
    log_signature=True,
    swnu_hidden_dim_sizes_and_sig_depths=swnu_hidden_dim_sizes_and_sig_depths,
    ffn_hidden_dim_sizes=ffn_hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    BiLSTM=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    features=features, 
    standardise_method=standardise_method,
    include_features_in_path=include_features_in_path,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/swnu_network_umap_focal_{gamma}_{size}_uni.csv",
    verbose=False
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: umap
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_umap_focal_2_35_uni.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_umap_focal_2_35_uni_best_model.csv


In [13]:
swnu_network_umap_uni.groupby(["dimensions",
                               "swnu_hidden_dim",
                               "ffn_hidden_dim",
                               "dropout_rate",
                               "learning_rate"]).mean()

  swnu_network_umap_uni.groupby(["dimensions",


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,loss,accuracy,f1,precision,recall,valid_loss,valid_accuracy,valid_f1,valid_precision,valid_recall,...,include_features_in_path,embedding_dim,num_features,log_signature,seed,BiLSTM,gamma,k_fold,batch_size,model_id
dimensions,swnu_hidden_dim,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
15,"(10,)","(64, 64)",0.1,0.0001,0.360039,0.585955,0.578512,0.599388,0.588962,0.228262,0.702254,0.65792,0.697563,0.660158,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,28.0
15,"(10,)","(64, 64)",0.1,0.0005,0.3507,0.62504,0.615264,0.653378,0.633591,0.204655,0.70344,0.677159,0.678869,0.676553,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,29.0
15,"(10,)","(64, 64)",0.1,0.001,0.327866,0.62504,0.622178,0.634257,0.628593,0.218793,0.650059,0.583897,0.6146,0.586857,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,27.0
15,"(10,)","(64, 64)",0.5,0.0001,0.341996,0.590404,0.578894,0.61389,0.597883,0.242103,0.672598,0.651604,0.651387,0.65375,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,25.0
15,"(10,)","(64, 64)",0.5,0.0005,0.349566,0.625358,0.61884,0.646415,0.631634,0.24341,0.658363,0.631157,0.63202,0.632033,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,26.0
15,"(10,)","(64, 64)",0.5,0.001,0.320726,0.642834,0.63833,0.659787,0.646754,0.229235,0.661922,0.601764,0.642803,0.61374,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,24.0
15,"(10,)","(128, 128)",0.1,0.0001,0.358317,0.612965,0.611169,0.621108,0.616819,0.222732,0.654804,0.596517,0.617335,0.600422,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,34.0
15,"(10,)","(128, 128)",0.1,0.0005,0.339834,0.628535,0.619443,0.656361,0.636128,0.224488,0.663108,0.631883,0.633445,0.632243,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,35.0
15,"(10,)","(128, 128)",0.1,0.001,0.303645,0.636161,0.628067,0.659461,0.641777,0.230831,0.66548,0.64186,0.641032,0.645352,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,33.0
15,"(10,)","(128, 128)",0.5,0.0001,0.315919,0.59803,0.594507,0.606477,0.601271,0.233131,0.638197,0.569016,0.598158,0.572626,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,31.0


In [14]:
best_swnu_network_umap_uni

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,BiLSTM,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,0.41317,0.605338,0.591806,"[0.5174825174825175, 0.6661290322580644]",0.638277,"[0.7184466019417476, 0.5581081081081081]",0.615186,"[0.40437158469945356, 0.826]",0.230895,0.633452,...,1,False,focal,2,False,,Conv1d,,concatenation,64
0,0.571896,0.611058,0.610208,"[0.5920000000000001, 0.628415300546448]",0.616621,"[0.656319290465632, 0.5769230769230769]",0.614581,"[0.5391621129326047, 0.69]",0.235289,0.768683,...,12,False,focal,2,False,,Conv1d,,concatenation,64
0,0.650293,0.551001,0.545443,"[0.5957081545064378, 0.49517684887459806]",0.548399,"[0.5633116883116883, 0.5334872979214781]",0.547029,"[0.6320582877959927, 0.462]",0.194062,0.761566,...,123,False,focal,2,False,,Conv1d,,concatenation,64


In [15]:
best_swnu_network_umap_uni["f1"].mean()

0.5824853089446776

In [16]:
best_swnu_network_umap_uni["precision"].mean()

0.6010993439452885

In [17]:
best_swnu_network_umap_uni["recall"].mean()

0.5922653309046751

In [18]:
np.stack(best_swnu_network_umap_uni["f1_scores"]).mean(axis=0)

array([0.56839689, 0.59657373])

In [19]:
np.stack(best_swnu_network_umap_uni["precision_scores"]).mean(axis=0)

array([0.64602586, 0.55617283])

In [20]:
np.stack(best_swnu_network_umap_uni["recall_scores"]).mean(axis=0)

array([0.52519733, 0.65933333])

## GRP

In [21]:
swnu_network_grp, best_swnu_network_grp, _, __ = swnu_network_hyperparameter_search(
    num_epochs=num_epochs,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    history_lengths=[size],
    dim_reduce_methods=["gaussian_random_projection"],
    dimensions=dimensions,
    log_signature=True,
    swnu_hidden_dim_sizes_and_sig_depths=swnu_hidden_dim_sizes_and_sig_depths,
    ffn_hidden_dim_sizes=ffn_hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    BiLSTM=bidirectional,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    features=features, 
    standardise_method=standardise_method,
    include_features_in_path=include_features_in_path,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/swnu_network_grp_focal_{gamma}_{size}.csv",
    verbose=False
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_grp_focal_2_35.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_grp_focal_2_35_best_model.csv


In [22]:
swnu_network_grp.groupby(["dimensions",
                           "swnu_hidden_dim",
                           "ffn_hidden_dim",
                           "dropout_rate",
                           "learning_rate"]).mean()

  swnu_network_grp.groupby(["dimensions",


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,loss,accuracy,f1,precision,recall,valid_loss,valid_accuracy,valid_f1,valid_precision,valid_recall,...,include_features_in_path,embedding_dim,num_features,log_signature,seed,BiLSTM,gamma,k_fold,batch_size,model_id
dimensions,swnu_hidden_dim,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
15,"(10,)","(64, 64)",0.1,0.0001,0.297851,0.628535,0.61885,0.65878,0.635949,0.222692,0.696323,0.665457,0.679948,0.668857,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,28.0
15,"(10,)","(64, 64)",0.1,0.0005,0.422844,0.573244,0.571826,0.573015,0.572534,0.218956,0.728351,0.693916,0.713597,0.687671,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,29.0
15,"(10,)","(64, 64)",0.1,0.001,0.453006,0.588179,0.58779,0.589248,0.589004,0.200969,0.715302,0.683531,0.690963,0.679538,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,27.0
15,"(10,)","(64, 64)",0.5,0.0001,0.321159,0.611058,0.61013,0.61344,0.61235,0.228281,0.677343,0.622313,0.646753,0.620924,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,25.0
15,"(10,)","(64, 64)",0.5,0.0005,0.347587,0.601843,0.599555,0.600807,0.600124,0.198709,0.730724,0.681986,0.716218,0.674773,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,26.0
15,"(10,)","(64, 64)",0.5,0.001,0.371043,0.598983,0.597605,0.59949,0.598672,0.203284,0.709371,0.652367,0.693096,0.649578,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,24.0
15,"(10,)","(128, 128)",0.1,0.0001,0.361233,0.612011,0.605134,0.631296,0.615343,0.2188,0.717675,0.680255,0.716374,0.674371,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,34.0
15,"(10,)","(128, 128)",0.1,0.0005,0.418845,0.590086,0.589391,0.592692,0.591778,0.194674,0.735469,0.710194,0.726336,0.706613,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,35.0
15,"(10,)","(128, 128)",0.1,0.001,0.441688,0.60375,0.601402,0.604775,0.603434,0.21044,0.727165,0.683988,0.708555,0.6769,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,33.0
15,"(10,)","(128, 128)",0.5,0.0001,0.356776,0.619956,0.612586,0.642312,0.625134,0.198909,0.754448,0.722759,0.759362,0.717996,...,1.0,384.0,2.0,1.0,45.333333,1.0,2.0,0.0,64.0,31.0


In [23]:
best_swnu_network_grp

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,BiLSTM,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,0.381269,0.635844,0.635804,"[0.6319845857418113, 0.6396226415094339]",0.638057,"[0.6707566462167689, 0.6053571428571428]",0.637725,"[0.5974499089253188, 0.678]",0.179601,0.80427,...,1,True,focal,2,False,,Conv1d,,concatenation,64
0,0.254553,0.627264,0.605198,"[0.511860174781523, 0.6985350809560523]",0.690937,"[0.8134920634920635, 0.5683814303638645]",0.639703,"[0.37340619307832423, 0.906]",0.243652,0.690391,...,12,True,focal,2,False,,Conv1d,,concatenation,64
0,0.434506,0.596759,0.596757,"[0.5975261655566129, 0.5959885386819483]",0.597941,"[0.6254980079681275, 0.5703839122486288]",0.597974,"[0.5719489981785064, 0.624]",0.173473,0.768683,...,123,True,focal,2,False,,Conv1d,,concatenation,64


0.6118406155835983

In [24]:
best_swnu_network_grp["f1"].mean()

0.6125861978712303

In [25]:
best_swnu_network_grp["precision"].mean()

0.642311533857766

In [26]:
best_swnu_network_grp["recall"].mean()

0.6251341833636915

In [27]:
np.stack(best_swnu_network_grp["f1_scores"]).mean(axis=0)

array([0.58045698, 0.64471542])

In [28]:
np.stack(best_swnu_network_grp["precision_scores"]).mean(axis=0)

array([0.70324891, 0.58137416])

In [29]:
np.stack(best_swnu_network_grp["recall_scores"]).mean(axis=0)

array([0.51426837, 0.736     ])

## Unidirectional LSTM

In [30]:
swnu_network_grp_uni, best_swnu_network_grp_uni, _, __ = swnu_network_hyperparameter_search(
    num_epochs=num_epochs,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    history_lengths=[size],
    dim_reduce_methods=["gaussian_random_projection"],
    dimensions=dimensions,
    log_signature=True,
    swnu_hidden_dim_sizes_and_sig_depths=swnu_hidden_dim_sizes_and_sig_depths,
    ffn_hidden_dim_sizes=ffn_hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    BiLSTM=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    device=device,
    features=features, 
    standardise_method=standardise_method,
    include_features_in_path=include_features_in_path,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/swnu_network_grp_focal_{gamma}_{size}_uni.csv",
    verbose=False
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_grp_focal_2_35_uni.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/swnu_network_grp_focal_2_35_uni_best_model.csv


In [31]:
swnu_network_grp_uni.groupby(["dimensions",
                              "swnu_hidden_dim",
                              "ffn_hidden_dim",
                              "dropout_rate",
                              "learning_rate"]).mean()

  swnu_network_grp_uni.groupby(["dimensions",


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,loss,accuracy,f1,precision,recall,valid_loss,valid_accuracy,valid_f1,valid_precision,valid_recall,...,include_features_in_path,embedding_dim,num_features,log_signature,seed,BiLSTM,gamma,k_fold,batch_size,model_id
dimensions,swnu_hidden_dim,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
15,"(10,)","(64, 64)",0.1,0.0001,0.335073,0.609787,0.605815,0.623489,0.614825,0.206288,0.716489,0.671554,0.702845,0.664302,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,28.0
15,"(10,)","(64, 64)",0.1,0.0005,0.388761,0.604703,0.604012,0.606674,0.605951,0.217998,0.715302,0.676387,0.703576,0.672509,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,29.0
15,"(10,)","(64, 64)",0.1,0.001,0.518921,0.623133,0.622361,0.624393,0.623737,0.28494,0.729537,0.689276,0.735836,0.683682,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,27.0
15,"(10,)","(64, 64)",0.5,0.0001,0.344333,0.598348,0.589263,0.620831,0.604372,0.215194,0.714116,0.668758,0.710335,0.666658,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,25.0
15,"(10,)","(64, 64)",0.5,0.0005,0.367809,0.609469,0.606847,0.619223,0.613926,0.191984,0.753262,0.708661,0.742913,0.705116,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,26.0
15,"(10,)","(64, 64)",0.5,0.001,0.391098,0.589133,0.588054,0.588769,0.588487,0.25208,0.702254,0.664607,0.686474,0.659455,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,24.0
15,"(10,)","(128, 128)",0.1,0.0001,0.330173,0.622815,0.621417,0.629193,0.625843,0.208501,0.716489,0.660645,0.71323,0.653759,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,34.0
15,"(10,)","(128, 128)",0.1,0.0005,0.339445,0.617731,0.617446,0.618057,0.61816,0.220798,0.711744,0.674915,0.692995,0.669013,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,35.0
15,"(10,)","(128, 128)",0.1,0.001,0.378575,0.616142,0.615371,0.617714,0.616969,0.210546,0.718861,0.683216,0.694512,0.678817,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,33.0
15,"(10,)","(128, 128)",0.5,0.0001,0.398534,0.59803,0.591742,0.613791,0.604217,0.207117,0.715302,0.681601,0.695014,0.676726,...,1.0,384.0,2.0,1.0,45.333333,0.0,2.0,0.0,64.0,31.0


In [32]:
best_swnu_network_grp_uni

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,BiLSTM,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,0.321927,0.617731,0.617229,"[0.6310947562097515, 0.6033630069238378]",0.617208,"[0.637546468401487, 0.5968688845401174]",0.617386,"[0.6247723132969034, 0.61]",0.184549,0.775801,...,1,False,focal,2,False,,Conv1d,,concatenation,64
0,0.447548,0.606292,0.604428,"[0.5772773797338792, 0.631578947368421]",0.614463,"[0.6588785046728972, 0.5700483091787439]",0.610831,"[0.5136612021857924, 0.708]",0.236997,0.72242,...,12,False,focal,2,False,,Conv1d,,concatenation,64
0,0.586376,0.589133,0.585807,"[0.5486910994764398, 0.6229221347331583]",0.599487,"[0.645320197044335, 0.5536547433903577]",0.594616,"[0.4772313296903461, 0.712]",0.200912,0.72242,...,123,False,focal,2,False,,Conv1d,,concatenation,64


In [33]:
best_swnu_network_grp_uni["f1"].mean()

0.6024878874075813

In [34]:
best_swnu_network_grp_uni["precision"].mean()

0.6103861845379898

In [35]:
best_swnu_network_grp_uni["recall"].mean()

0.6076108075288403

In [36]:
np.stack(best_swnu_network_grp_uni["f1_scores"]).mean(axis=0)

array([0.58568775, 0.61928803])

In [37]:
np.stack(best_swnu_network_grp_uni["precision_scores"]).mean(axis=0)

array([0.64724839, 0.57352398])

In [38]:
np.stack(best_swnu_network_grp_uni["recall_scores"]).mean(axis=0)

array([0.53855495, 0.67666667])