In [1]:
import numpy as np
import pickle
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.seqsignet_attention_encoder_functions import (
    seqsignet_attention_encoder_hyperparameter_search,
)

In [4]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [5]:
%run load_sbert-embeddings.py

In [6]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


# SeqSigNet with Attention Network

In [7]:
features = ["time_encoding", "timeline_index"]
standardise_method = ["z_score", None]
include_features_in_path = True
include_features_in_input = True

In [8]:
split_ids = torch.tensor(df_rumours['timeline_id'].astype(int))

In [9]:
num_epochs = 100
dimensions = [15]  # [50, 15]
# define swmhau parameters: (output_channels, sig_depth, num_heads)
swmhau_parameters = [(12, 3, 10), (8, 4, 6)]#, (8, 4, 12)]
num_layers = [1]
ffn_hidden_dim_sizes = [[256,256],[512,512]]
dropout_rates = [0.1, 0.2]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5

In [10]:
kwargs = {
    "num_epochs": num_epochs,
    "df": df_rumours,
    "id_column": "timeline_id",
    "label_column": "label",
    "embeddings": sbert_embeddings,
    "y_data": y_data,
    "output_dim": output_dim,
    "dimensions": dimensions,
    "log_signature": True,
    "pooling": "signature",
    "transformer_encoder_layers": 1,
    "swmhau_parameters": swmhau_parameters,
    "num_layers": num_layers,
    "ffn_hidden_dim_sizes": ffn_hidden_dim_sizes,
    "dropout_rates": dropout_rates,
    "learning_rates": learning_rates,
    "seeds": seeds,
    "loss": loss,
    "gamma": gamma,
    "device": device,
    "features": features,
    "standardise_method": standardise_method,
    "include_features_in_path": include_features_in_path,
    "include_features_in_input": include_features_in_input,
    "split_ids": split_ids,
    "k_fold": True,
    "patience": patience,
    "validation_metric": validation_metric,
    "verbose": False,
}

# history_length=11

In [11]:
shift = 3
window_size = 5
n = 3

## Random Projections

In [12]:
(
    seqsignet_attention_encoder_grp_11,
    best_seqsignet_attention_encoder_grp_11,
    _,
    __,
) = seqsignet_attention_encoder_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_encoder_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_3_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_3_kfold_best_model.csv


In [13]:
seqsignet_attention_encoder_grp_11.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_encoder_grp_11.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,transformer_encoder_layers,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.683442,0.652011,0.65329,0.652501,0.715146,0.695099,0.698123,0.694386,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.698664,0.668237,0.668548,0.66798,0.71236,0.689387,0.694212,0.6865,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.702827,0.667858,0.671407,0.665399,0.715505,0.69084,0.697972,0.687298,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.688723,0.606914,0.656136,0.606651,0.698252,0.64046,0.693458,0.639298,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.69388,0.662904,0.663534,0.662907,0.704632,0.681465,0.685871,0.679169,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.704629,0.667058,0.67295,0.663627,0.714247,0.686369,0.6972,0.682005,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.686735,0.649685,0.655134,0.648963,0.712046,0.687894,0.695239,0.685657,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.696676,0.658761,0.664445,0.656363,0.713034,0.686143,0.695496,0.682179,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.694501,0.666916,0.665787,0.668347,0.709395,0.690083,0.691482,0.688992,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.694191,0.631659,0.660262,0.627712,0.701847,0.65627,0.689696,0.652454,11.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [14]:
best_seqsignet_attention_encoder_grp_11

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.671202,0.658492,"[0.724375, 0.5926096997690532]",0.659525,"[0.7960164835164835, 0.5230330207908683]",0.674051,"[0.6645642201834863, 0.6835375599360682]",,0.756301,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.675116,0.662279,"[0.7281235376696303, 0.5964343598055106]",0.662966,"[0.7984946972288745, 0.5274365274365275]",0.677676,"[0.669151376146789, 0.6862013851891315]",,0.766141,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.671761,0.658111,"[0.7264253534255087, 0.5897973445143256]",0.658409,"[0.7928111224143778, 0.5240066225165563]",0.672389,"[0.6702981651376146, 0.6744805540756527]",,0.76668,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [15]:
best_seqsignet_attention_encoder_grp_11[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(512, 512)",0.1,0.0001
0,15,12,3,10,1,"(512, 512)",0.1,0.0001
0,15,12,3,10,1,"(512, 512)",0.1,0.0001


in path: 0.658

in input: 0.6430702845314854

both: 0.6596275491973381

In [16]:
best_seqsignet_attention_encoder_grp_11["f1"].mean()

0.6596275491973381

In [17]:
best_seqsignet_attention_encoder_grp_11["precision"].mean()

0.6602997456506147

In [18]:
best_seqsignet_attention_encoder_grp_11["recall"].mean()

0.6747055434447904

In [19]:
np.stack(best_seqsignet_attention_encoder_grp_11["f1_scores"]).mean(axis=0)

array([0.72630796, 0.59294713])

In [20]:
np.stack(best_seqsignet_attention_encoder_grp_11["precision_scores"]).mean(axis=0)

array([0.7957741 , 0.52482539])

In [21]:
np.stack(best_seqsignet_attention_encoder_grp_11["recall_scores"]).mean(axis=0)

array([0.66800459, 0.6814065 ])

# history_length=20

In [22]:
shift = 3
window_size = 5
n = 6

## Random Projections

In [23]:
(
    seqsignet_attention_encoder_grp_20,
    best_seqsignet_attention_encoder_grp_20,
    _,
    __,
) = seqsignet_attention_encoder_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_encoder_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_6_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_6_kfold_best_model.csv


In [24]:
seqsignet_attention_encoder_grp_20.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_encoder_grp_20.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,transformer_encoder_layers,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.674806,0.626625,0.639399,0.625599,0.721975,0.690498,0.706507,0.688679,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.694129,0.665069,0.664785,0.665928,0.70607,0.685172,0.687609,0.683613,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.700217,0.667265,0.670584,0.666468,0.711012,0.687494,0.694408,0.684981,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.685989,0.586221,0.657558,0.594911,0.688592,0.618016,0.69093,0.622825,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.700777,0.657054,0.668239,0.652954,0.708137,0.675044,0.692442,0.671025,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.699472,0.668886,0.66959,0.66852,0.709485,0.687738,0.691524,0.685602,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.668282,0.646247,0.646285,0.653063,0.799748,0.79017,0.787667,0.796262,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.695744,0.667666,0.666759,0.668769,0.711956,0.691815,0.693974,0.690202,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.691892,0.662226,0.661877,0.66269,0.706699,0.686639,0.688387,0.685275,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.698354,0.646478,0.664606,0.641617,0.703958,0.667403,0.688053,0.662987,20.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [25]:
best_seqsignet_attention_encoder_grp_20

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.660578,0.64264,"[0.7227044312471448, 0.5625750660581311]",0.641477,"[0.7707047742773627, 0.5122484689413823]",0.6521,"[0.6803325688073395, 0.623867874267448]",,0.845667,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.684436,0.665452,"[0.7451452656932109, 0.5857597259603622]",0.663049,"[0.7844690966719493, 0.5416289592760181]",0.673648,"[0.7095756880733946, 0.6377197655833777]",,0.846206,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.669525,0.652121,"[0.7299314546839301, 0.5743097238895558]",0.650704,"[0.7786805329866753, 0.5227272727272727]",0.662057,"[0.6869266055045872, 0.637187000532765]",,0.836636,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [26]:
best_seqsignet_attention_encoder_grp_20[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(256, 256)",0.1,0.0001
0,15,12,3,10,1,"(256, 256)",0.1,0.0001
0,15,12,3,10,1,"(256, 256)",0.1,0.0001


In [27]:
best_seqsignet_attention_encoder_grp_20["f1"].mean()

0.6534042779220558

In [28]:
best_seqsignet_attention_encoder_grp_20["precision"].mean()

0.6517431841467768

In [29]:
best_seqsignet_attention_encoder_grp_20["recall"].mean()

0.6626015837948187

In [30]:
np.stack(best_seqsignet_attention_encoder_grp_20["f1_scores"]).mean(axis=0)

array([0.73259372, 0.57421484])

In [31]:
np.stack(best_seqsignet_attention_encoder_grp_20["precision_scores"]).mean(axis=0)

array([0.77795147, 0.5255349 ])

In [32]:
np.stack(best_seqsignet_attention_encoder_grp_20["recall_scores"]).mean(axis=0)

array([0.69227829, 0.63292488])

# history_length=35

In [33]:
shift = 3
window_size = 5
n = 11

## Random Projections

In [34]:
(
    seqsignet_attention_encoder_grp_35,
    best_seqsignet_attention_encoder_grp_35,
    _,
    __,
) = seqsignet_attention_encoder_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_encoder_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 11: history length = 35
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 11: history length = 35
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_11_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_11_kfold_best_model.csv


In [35]:
seqsignet_attention_encoder_grp_35.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_encoder_grp_35.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,transformer_encoder_layers,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.66996,0.624108,0.631656,0.621052,0.787977,0.7681,0.779927,0.762987,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.699845,0.669399,0.67047,0.669503,0.714562,0.692969,0.697328,0.690768,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.693321,0.666014,0.664777,0.667685,0.70625,0.687673,0.688501,0.68713,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.686238,0.63559,0.649914,0.631684,0.713124,0.678518,0.697352,0.674308,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.69649,0.663949,0.665318,0.662822,0.711417,0.689078,0.693212,0.686507,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.689655,0.660947,0.660161,0.661995,0.709889,0.690762,0.692162,0.689822,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.680957,0.652143,0.65154,0.653665,0.790268,0.778663,0.777899,0.779798,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.697173,0.665723,0.666884,0.665316,0.710788,0.689489,0.69296,0.687651,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.697297,0.664845,0.666217,0.66373,0.708811,0.686506,0.690355,0.684062,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.689345,0.655501,0.658262,0.654989,0.756391,0.739795,0.742789,0.738462,35.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [36]:
best_seqsignet_attention_encoder_grp_35

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.665797,0.640161,"[0.7362071502133294, 0.5441139079583016]",0.638274,"[0.7561196736174071, 0.5204280155642024]",0.643688,"[0.7173165137614679, 0.5700586041555674]",,0.869255,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.703448,0.675674,"[0.7705839942321557, 0.580764163372859]",0.674862,"[0.7751668117203365, 0.5745568300312826]",0.676581,"[0.7660550458715596, 0.5871070857751731]",,0.883677,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.686673,0.655832,"[0.7588581265241716, 0.5528065974993349]",0.655738,"[0.7594028136663795, 0.5520722635494155]",0.655929,"[0.7583142201834863, 0.5535428875865743]",,0.871681,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [37]:
best_seqsignet_attention_encoder_grp_35[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(256, 256)",0.1,0.0001
0,15,12,3,10,1,"(256, 256)",0.1,0.0001
0,15,12,3,10,1,"(256, 256)",0.1,0.0001


In [38]:
best_seqsignet_attention_encoder_grp_35["f1"].mean()

0.6572223233000253

In [39]:
best_seqsignet_attention_encoder_grp_35["precision"].mean()

0.6562910680248373

In [40]:
best_seqsignet_attention_encoder_grp_35["recall"].mean()

0.6587323928889715

In [41]:
np.stack(best_seqsignet_attention_encoder_grp_35["f1_scores"]).mean(axis=0)

array([0.75521642, 0.55922822])

In [42]:
np.stack(best_seqsignet_attention_encoder_grp_35["precision_scores"]).mean(axis=0)

array([0.7635631 , 0.54901904])

In [43]:
np.stack(best_seqsignet_attention_encoder_grp_35["recall_scores"]).mean(axis=0)

array([0.74722859, 0.57023619])

# history_length=80

In [11]:
shift = 3
window_size = 5
n = 26

## UMAP

## Random Projections

In [12]:
(
    seqsignet_attention_encoder_grp_80,
    best_seqsignet_attention_encoder_grp_80,
    _,
    __,
) = seqsignet_attention_encoder_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_encoder_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 26: history length = 80
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 26: history length = 80
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_26_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_encoder_grp_focal_2_3_5_26_kfold_best_model.csv


In [13]:
seqsignet_attention_encoder_grp_80.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_encoder_grp_80.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,transformer_encoder_layers,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.677664,0.647965,0.647119,0.648959,0.880667,0.873257,0.87414,0.872484,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.704442,0.668731,0.673252,0.66615,0.713214,0.687695,0.695868,0.684225,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.700093,0.671142,0.670825,0.671622,0.710518,0.689384,0.692439,0.687388,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.685306,0.653579,0.655138,0.654262,0.748618,0.730535,0.733092,0.729398,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.700342,0.666742,0.669293,0.665046,0.711192,0.687801,0.693127,0.68499,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.699658,0.667484,0.669101,0.666489,0.714517,0.689875,0.69722,0.6865,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.681081,0.654769,0.65301,0.657493,0.771308,0.75901,0.75819,0.76011,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.699223,0.668661,0.669178,0.668369,0.710833,0.689149,0.692632,0.686895,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.698291,0.666933,0.668242,0.666545,0.710428,0.688139,0.692566,0.685912,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.68804,0.658219,0.65872,0.659481,0.748708,0.734556,0.734281,0.735083,80.0,3.0,...,384.0,2.0,1.0,1.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [14]:
best_seqsignet_attention_encoder_grp_80

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.682013,0.652768,"[0.7535394394683618, 0.5519957983193278]",0.651871,"[0.7594641817122889, 0.5442775763852926]",0.653821,"[0.7477064220183486, 0.5599360681939265]",,0.884081,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.680149,0.650401,"[0.7523809523809523, 0.548421052631579]",0.649635,"[0.7574084834398606, 0.5418616744669786]",0.65128,"[0.7474197247706422, 0.5551411827384124]",,0.885429,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.670829,0.640725,"[0.7447239086441169, 0.5367261280167891]",0.639851,"[0.7510204081632653, 0.5286821705426357]",0.641775,"[0.7385321100917431, 0.5450186467767715]",,0.87249,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [15]:
best_seqsignet_attention_encoder_grp_80[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,8,4,6,1,"(256, 256)",0.1,0.0001
0,15,8,4,6,1,"(256, 256)",0.1,0.0001
0,15,8,4,6,1,"(256, 256)",0.1,0.0001


In [16]:
best_seqsignet_attention_encoder_grp_80["f1"].mean()

0.6479645465768544

In [17]:
best_seqsignet_attention_encoder_grp_80["precision"].mean()

0.6471190824517202

In [18]:
best_seqsignet_attention_encoder_grp_80["recall"].mean()

0.6489590257649741

In [19]:
np.stack(best_seqsignet_attention_encoder_grp_80["f1_scores"]).mean(axis=0)

array([0.75021477, 0.54571433])

In [20]:
np.stack(best_seqsignet_attention_encoder_grp_80["precision_scores"]).mean(axis=0)

array([0.75596436, 0.53827381])

In [21]:
np.stack(best_seqsignet_attention_encoder_grp_80["recall_scores"]).mean(axis=0)

array([0.74455275, 0.5533653 ])