In [1]:
import pickle
import numpy as np
import pandas as pd
import re
import os

seed = 2023

In [2]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    histories_baseline_hyperparameter_search
)

In [3]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [4]:
%run load_sbert-embeddings.py

In [5]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


In [6]:
sbert_embeddings.shape

(5568, 384)

In [7]:
from nlpsig_networks.scripts.ffn_baseline_functions import obtain_signatures_history

x_data = obtain_signatures_history(
    method="gaussian_random_projection",
    dimension=10,
    sig_depth=3,
    log_signature=True,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    seed=seed,
    path_indices=None,
    concatenate_current=False
)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

In [8]:
x_data.shape

torch.Size([5568, 385])

# Baseline: FFN using signatures

In [9]:
num_epochs = 100
hidden_dim_sizes = [[64,64],[128,128],[256,256],[512,512]]
dropout_rates = [0.5, 0.1]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5
split_indices = (df_rumours[df_rumours['set']=='train'].index,
                 df_rumours[df_rumours['set']=='dev'].index,
                 df_rumours[df_rumours['set']=='test'].index)

In [10]:
log_signature_dimensions_and_sig_depths = [(28, 2), (10, 3), (6, 4)]

In [11]:
import signatory

[signatory.logsignature_channels(channels, depth)
 for (channels, depth) in log_signature_dimensions_and_sig_depths]

[406, 385, 406]

In [12]:
# dim_reduce_methods = ["umap", "gaussian_random_projection"]
# num_epochs = 100
# batch=64
# log_sig = True
# log_signature_dimensions_and_sig_depths = [(28, 2), (10, 3)]#[(28, 2), (10, 3), (6, 4)]
# hidden_dim_sizes = [[64,64],[128,128], [256,256]]#[[64,64], [128,128], [256,256]]
# dropout_rates =  [0.2, 0.1]
# learning_rates = [1e-3, 1e-4, 5e-4]
# seeds = [0, 1, 12, 123, 1234]
# loss = "focal"
# gamma = 2
# patience=5
# validation_metric = "f1"
# split_indices = (df_rumours[df_rumours['set']=='train'].index,
#                  df_rumours[df_rumours['set']=='dev'].index,
#                  df_rumours[df_rumours['set']=='test'].index)

## UMAP

In [13]:
ffn_logsignature_umap, best_ffn_logsignature_umap, _, __ = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=True,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    log_signature=True,
    dim_reduce_methods=["umap"],
    dimension_and_sig_depths=log_signature_dimensions_and_sig_depths,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_logsignature_umap_focal_{gamma}_kfold.csv",
    verbose=False
)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in rumours_output/ffn_logsignature_umap_focal_2_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/ffn_logsignature_umap_focal_2_kfold_best_model.csv


In [14]:
ffn_logsignature_umap

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,gamma,k_fold,n_splits,batch_size,model_id,input_dim,dimension,sig_depth,method,log_signature
0,0.285992,0.471878,0.460399,"[0.5391014975041597, 0.3816964285714286]",0.463995,"[0.49617151607963245, 0.4318181818181818]",0.466082,"[0.5901639344262295, 0.342]",0.242133,0.647687,...,2,False,,64,0.00,790,28,2,umap,True
0,0.276866,0.457579,0.441867,"[0.5355102040816326, 0.34822451317296677]",0.446357,"[0.48520710059171596, 0.4075067024128686]",0.450725,"[0.5974499089253188, 0.304]",0.256959,0.612100,...,2,False,,64,0.00,790,28,2,umap,True
0,0.286557,0.450906,0.441658,"[0.5135135135135134, 0.3698030634573304]",0.443476,"[0.47874015748031495, 0.4082125603864734]",0.445867,"[0.5537340619307832, 0.338]",0.247360,0.594306,...,2,False,,64,0.00,790,28,2,umap,True
0,0.277643,0.505243,0.499943,"[0.5514261019878999, 0.44845908607863977]",0.501565,"[0.524671052631579, 0.47845804988662133]",0.501528,"[0.581056466302368, 0.422]",0.247274,0.576512,...,2,False,,64,0.10,790,28,2,umap,True
0,0.260591,0.494757,0.477055,"[0.573268921095008, 0.3808411214953271]",0.485787,"[0.5137085137085137, 0.45786516853932585]",0.487226,"[0.6484517304189436, 0.326]",0.241095,0.622776,...,2,False,,64,0.10,790,28,2,umap,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.852361,0.576740,0.576320,"[0.589648798521257, 0.562992125984252]",0.576381,"[0.5984990619136961, 0.5542635658914729]",0.576528,"[0.581056466302368, 0.572]",0.361143,0.537367,...,2,False,,64,2.22,790,6,4,umap,True
0,0.504039,0.585319,0.580363,"[0.6259673258813414, 0.5347593582887701]",0.583773,"[0.5928338762214984, 0.5747126436781609]",0.581512,"[0.663023679417122, 0.5]",0.521041,0.530249,...,2,False,,64,2.22,790,6,4,umap,True
0,0.390058,0.612011,0.587847,"[0.6876438986953185, 0.4880503144654088]",0.625896,"[0.5941644562334217, 0.6576271186440678]",0.602015,"[0.8160291438979964, 0.388]",0.386084,0.633452,...,2,False,,64,2.23,790,6,4,umap,True
0,0.457021,0.609152,0.603881,"[0.6495726495726496, 0.5581896551724138]",0.608528,"[0.6119162640901772, 0.6051401869158879]",0.605084,"[0.692167577413479, 0.518]",0.442493,0.644128,...,2,False,,64,2.23,790,6,4,umap,True


In [15]:
best_ffn_logsignature_umap

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,loss_function,gamma,k_fold,n_splits,batch_size,input_dim,dimension,sig_depth,method,log_signature
0,0.289668,0.529075,0.525181,"[0.4821802935010482, 0.5681818181818182]",0.53628,"[0.5679012345679012, 0.5046583850931677]",0.534472,"[0.41894353369763204, 0.65]",0.258047,0.604982,...,focal,2,False,,64,769,10,3,umap,True
0,0.337377,0.551954,0.5517,"[0.5623836126629422, 0.541015625]",0.551932,"[0.5752380952380952, 0.5286259541984732]",0.552046,"[0.5500910746812386, 0.554]",0.241171,0.672598,...,focal,2,False,,64,769,10,3,umap,True
0,0.340627,0.558627,0.551956,"[0.6066270178419712, 0.49728555917481004]",0.556207,"[0.5684713375796179, 0.5439429928741093]",0.554137,"[0.6502732240437158, 0.458]",0.359628,0.594306,...,focal,2,False,,64,769,10,3,umap,True


In [17]:
best_ffn_logsignature_umap["f1"].mean()

0.542945654393765

In [18]:
best_ffn_logsignature_umap["precision"].mean()

0.5481396665918942

In [19]:
best_ffn_logsignature_umap["recall"].mean()

0.5468846387370978

In [20]:
np.stack(best_ffn_logsignature_umap["f1_scores"]).mean(axis=0)

array([0.55039697, 0.53549433])

In [21]:
np.stack(best_ffn_logsignature_umap["precision_scores"]).mean(axis=0)

array([0.57053689, 0.52574244])

In [22]:
np.stack(best_ffn_logsignature_umap["recall_scores"]).mean(axis=0)

array([0.53976928, 0.554     ])

## GRP

In [23]:
ffn_logsignature_grp, best_ffn_logsignature_grp, _, __ = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=df_rumours,
    id_column="timeline_id",
    label_column="label",
    embeddings=sbert_embeddings,
    y_data=y_data,
    output_dim=output_dim,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=True,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    log_signature=True,
    dim_reduce_methods=["gaussian_random_projection"],
    dimension_and_sig_depths=log_signature_dimensions_and_sig_depths,
    split_ids=None, #torch.tensor(df_rumours['timeline_id'].astype(int)),
    split_indices=split_indices,
    k_fold=False,
    patience=patience,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_logsignature_grp_focal_{gamma}_kfold.csv",
    verbose=False
)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in rumours_output/ffn_logsignature_grp_focal_2_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/ffn_logsignature_grp_focal_2_kfold_best_model.csv


In [24]:
ffn_logsignature_grp

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,gamma,k_fold,n_splits,batch_size,model_id,input_dim,dimension,sig_depth,method,log_signature
0,0.950020,0.597712,0.595964,"[0.6225402504472273, 0.5693877551020409]",0.596425,"[0.6115992970123023, 0.58125]",0.595940,"[0.6338797814207651, 0.558]",0.473022,0.608541,...,2,False,,64,0.00,790,28,2,gaussian_random_projection,True
0,0.429582,0.587226,0.583128,"[0.6244579358196011, 0.5417989417989418]",0.585654,"[0.5960264900662252, 0.5752808988764045]",0.583869,"[0.6557377049180327, 0.512]",0.297929,0.572954,...,2,False,,64,0.00,790,28,2,gaussian_random_projection,True
0,0.336393,0.581506,0.577668,"[0.6179286335944301, 0.5374077976817704]",0.579798,"[0.5916666666666667, 0.5679287305122495]",0.578315,"[0.6466302367941712, 0.51]",0.276014,0.569395,...,2,False,,64,0.00,790,28,2,gaussian_random_projection,True
0,0.318603,0.571973,0.562293,"[0.6273858921161826, 0.49720044792833157]",0.570553,"[0.5762195121951219, 0.5648854961832062]",0.566262,"[0.6885245901639344, 0.444]",0.263374,0.572954,...,2,False,,64,0.10,790,28,2,gaussian_random_projection,True
0,0.319115,0.560534,0.548392,"[0.6224406224406224, 0.47434435575826683]",0.558600,"[0.5654761904761905, 0.5517241379310345]",0.554084,"[0.692167577413479, 0.416]",0.260728,0.572954,...,2,False,,64,0.10,790,28,2,gaussian_random_projection,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.368884,0.597712,0.596603,"[0.5754527162977867, 0.6177536231884059]",0.603633,"[0.6426966292134831, 0.5645695364238411]",0.601474,"[0.5209471766848816, 0.682]",0.248410,0.661922,...,2,False,,64,2.22,790,6,4,gaussian_random_projection,True
0,0.291343,0.597712,0.595178,"[0.6272084805653709, 0.5631469979296067]",0.596305,"[0.6089193825042881, 0.5836909871244635]",0.595315,"[0.6466302367941712, 0.544]",0.227605,0.683274,...,2,False,,64,2.22,790,6,4,gaussian_random_projection,True
0,0.280386,0.626311,0.624780,"[0.6008146639511203, 0.6487455197132616]",0.634478,"[0.6812933025404158, 0.5876623376623377]",0.630670,"[0.5373406193078324, 0.724]",0.248542,0.654804,...,2,False,,64,2.23,790,6,4,gaussian_random_projection,True
0,0.418444,0.579600,0.578564,"[0.5576730190571715, 0.5994550408719346]",0.584810,"[0.6205357142857143, 0.5490848585690515]",0.583188,"[0.5063752276867031, 0.66]",0.267180,0.629893,...,2,False,,64,2.23,790,6,4,gaussian_random_projection,True


In [25]:
best_ffn_logsignature_grp

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,loss_function,gamma,k_fold,n_splits,batch_size,input_dim,dimension,sig_depth,method,log_signature
0,0.262463,0.621544,0.621407,"[0.6286248830682881, 0.6141885325558796]",0.621754,"[0.6461538461538462, 0.5973534971644613]",0.622011,"[0.6120218579234973, 0.632]",0.230735,0.708185,...,focal,2,False,,64,790,6,4,gaussian_random_projection,True
0,1.173876,0.5653,0.56492,"[0.5520628683693517, 0.5777777777777778]",0.568539,"[0.5991471215351812, 0.5379310344827586]",0.56792,"[0.51183970856102, 0.624]",0.63085,0.658363,...,focal,2,False,,64,790,6,4,gaussian_random_projection,True
0,0.273432,0.612965,0.612313,"[0.6282051282051282, 0.5964214711729622]",0.612281,"[0.6316758747697975, 0.5928853754940712]",0.612386,"[0.6247723132969034, 0.6]",0.228428,0.676157,...,focal,2,False,,64,790,6,4,gaussian_random_projection,True


In [27]:
best_ffn_logsignature_grp["f1"].mean()

0.5995467768582313

In [28]:
best_ffn_logsignature_grp["precision"].mean()

0.6008577916000193

In [29]:
best_ffn_logsignature_grp["recall"].mean()

0.6007723132969034

In [30]:
np.stack(best_ffn_logsignature_grp["f1_scores"]).mean(axis=0)

array([0.60296429, 0.59612926])

In [31]:
np.stack(best_ffn_logsignature_grp["precision_scores"]).mean(axis=0)

array([0.62565895, 0.57605664])

In [32]:
np.stack(best_ffn_logsignature_grp["recall_scores"]).mean(axis=0)

array([0.58287796, 0.61866667])