In [1]:
import numpy as np
import pickle
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.seqsignet_attention_bilstm_functions import (
    seqsignet_attention_bilstm_hyperparameter_search,
)

In [4]:
output_dir = "rumours_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## Rumours

In [5]:
%run load_sbert_embeddings.py

In [6]:
df_rumours.head()

Unnamed: 0,id,label,datetime,text,timeline_id,set
0,5.249902e+17,0,2014-10-22 18:26:23,Police have clarified that there were two shoo...,0,train
1,5.249906e+17,0,2014-10-22 18:27:58,"@CTVNews you guys ""confirmed"" there were 3 sho...",0,train
2,5.249908e+17,1,2014-10-22 18:28:46,@CTVNews get it right. http://t.co/GHYxMuzPG9,0,train
3,5.249927e+17,1,2014-10-22 18:36:29,RT @CTVNews Police have clarified that there w...,0,train
4,5.250038e+17,1,2014-10-22 19:20:41,@CTVNews @ctvsaskatoon so what happened at Rid...,0,train


# SeqSigNet with Attention Network

In [7]:
features = ["time_encoding", "timeline_index"]
standardise_method = ["z_score", None]
include_features_in_path = True
include_features_in_input = True

In [8]:
num_epochs = 100
dimensions = [15]
# define swmhau parameters: (output_channels, sig_depth, num_heads)
swmhau_parameters = [(12, 3, 10), (8, 4, 6)]
num_layers = [1]
lstm_hidden_dim_sizes = [384]
ffn_hidden_dim_sizes = [[256, 256], [512, 512]]
dropout_rates = [0.1, 0.2]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5

In [9]:
kwargs = {
    "num_epochs": num_epochs,
    "df": df_rumours,
    "id_column": "timeline_id",
    "label_column": "label",
    "embeddings": sbert_embeddings,
    "y_data": y_data,
    "output_dim": output_dim,
    "dimensions": dimensions,
    "log_signature": True,
    "pooling": "signature",
    "swmhau_parameters": swmhau_parameters,
    "num_layers": num_layers,
    "lstm_hidden_dim_sizes": lstm_hidden_dim_sizes,
    "ffn_hidden_dim_sizes": ffn_hidden_dim_sizes,
    "dropout_rates": dropout_rates,
    "learning_rates": learning_rates,
    "seeds": seeds,
    "loss": loss,
    "gamma": gamma,
    "device": device,
    "features": features,
    "standardise_method": standardise_method,
    "include_features_in_path": include_features_in_path,
    "include_features_in_input": include_features_in_input,
    "split_ids": split_ids,
    "k_fold": True,
    "patience": patience,
    "validation_metric": validation_metric,
    "verbose": False,
}

# history_length=11

In [10]:
shift = 3
window_size = 5
n = 3

## Random Projections

In [11]:
(
    seqsignet_attention_bilstm_grp_11,
    best_seqsignet_attention_bilstm_grp_11,
    _,
    __,
) = seqsignet_attention_bilstm_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_bilstm_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_3_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_3_kfold_best_model.csv


In [12]:
seqsignet_attention_bilstm_grp_11.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_bilstm_grp_11.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,lstm_hidden_dim,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.668903,0.654774,0.655422,0.668756,0.765737,0.758172,0.756006,0.767422,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.686424,0.668101,0.66849,0.678335,0.704497,0.692615,0.692798,0.698275,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.679652,0.663236,0.663093,0.67489,0.695646,0.684564,0.68438,0.690874,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.684871,0.670266,0.669459,0.683456,0.771667,0.763925,0.761238,0.772328,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.654116,0.647928,0.661143,0.676126,0.675293,0.672483,0.684643,0.693892,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.678969,0.661261,0.659723,0.671042,0.697983,0.686728,0.685159,0.692128,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.678347,0.66346,0.662743,0.676265,0.766455,0.759304,0.756841,0.769022,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.680211,0.666013,0.667216,0.680651,0.69322,0.684518,0.685298,0.69378,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.673936,0.659427,0.66415,0.675456,0.68666,0.677362,0.681641,0.687914,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.682075,0.666866,0.665967,0.679255,0.767264,0.758861,0.75632,0.766461,11.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [13]:
best_seqsignet_attention_bilstm_grp_11

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.688537,0.671317,"[0.7465493705445169, 0.5960841189267585]",0.669084,"[0.7925925925925926, 0.5455752212389381]",0.681231,"[0.7055619266055045, 0.6568993074054342]",,0.772746,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.696552,0.678867,"[0.7542270531400966, 0.6035070628348758]",0.676205,"[0.7965561224489796, 0.5558546433378196]",0.688133,"[0.7161697247706422, 0.6600958977091103]",,0.783394,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.679031,0.661799,"[0.7381386861313868, 0.5854597977852672]",0.659991,"[0.7859455958549223, 0.5340360122968819]",0.671828,"[0.6958142201834863, 0.6478423015450187]",,0.780294,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [14]:
best_seqsignet_attention_bilstm_grp_11[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(512, 512)",0.1,0.0001
0,15,12,3,10,1,"(512, 512)",0.1,0.0001
0,15,12,3,10,1,"(512, 512)",0.1,0.0001


In [15]:
best_seqsignet_attention_bilstm_grp_11["f1"].mean()

0.6706610148938169

In [16]:
best_seqsignet_attention_bilstm_grp_11["precision"].mean()

0.668426697961689

In [17]:
best_seqsignet_attention_bilstm_grp_11["recall"].mean()

0.6803972297031994

In [18]:
np.stack(best_seqsignet_attention_bilstm_grp_11["f1_scores"]).mean(axis=0)

array([0.74630504, 0.59501699])

In [19]:
np.stack(best_seqsignet_attention_bilstm_grp_11["precision_scores"]).mean(axis=0)

array([0.7916981 , 0.54515529])

In [20]:
np.stack(best_seqsignet_attention_bilstm_grp_11["recall_scores"]).mean(axis=0)

array([0.70584862, 0.65494584])

# history_length=20

In [21]:
shift = 3
window_size = 5
n = 6

## Random Projections

In [22]:
(
    seqsignet_attention_bilstm_grp_20,
    best_seqsignet_attention_bilstm_grp_20,
    _,
    __,
) = seqsignet_attention_bilstm_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_bilstm_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_6_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_6_kfold_best_model.csv


In [23]:
seqsignet_attention_bilstm_grp_20.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_bilstm_grp_20.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,lstm_hidden_dim,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.672818,0.652105,0.650136,0.659463,0.843375,0.837526,0.833285,0.846109,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.669214,0.658801,0.663037,0.678345,0.68711,0.681492,0.684541,0.695111,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.687108,0.666062,0.663597,0.67238,0.704003,0.690624,0.689104,0.69387,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.672942,0.656486,0.655709,0.667597,0.843375,0.837918,0.833662,0.847625,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.643243,0.637078,0.660096,0.670635,0.666622,0.66289,0.684,0.688176,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.692762,0.668263,0.666142,0.671766,0.708496,0.691781,0.691401,0.692249,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.67319,0.654822,0.65318,0.664097,0.845038,0.838612,0.834623,0.84537,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.679714,0.664522,0.665439,0.67789,0.693085,0.684404,0.685449,0.693761,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.678409,0.661407,0.660123,0.671925,0.697848,0.687356,0.686262,0.693785,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.678596,0.659738,0.657714,0.66809,0.846161,0.839998,0.835917,0.847409,20.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [24]:
best_seqsignet_attention_bilstm_grp_20

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.68891,0.670195,"[0.7487580912238446, 0.591632003914852]",0.667666,"[0.7882725832012678, 0.5470588235294118]",0.678565,"[0.7130160550458715, 0.6441129461907299]",,0.847014,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.691519,0.67411,"[0.7494322482967449, 0.5987878787878788]",0.671705,"[0.7940327237728585, 0.5493772241992882]",0.68377,"[0.7095756880733946, 0.6579648375066596]",,0.855641,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.678844,0.656511,"[0.744096242388237, 0.5689266950212659]",0.654139,"[0.7719568567026194, 0.5363207547169812]",0.661965,"[0.7181766055045872, 0.6057538625466169]",,0.853619,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [25]:
best_seqsignet_attention_bilstm_grp_20[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(256, 256)",0.2,0.0001
0,15,12,3,10,1,"(256, 256)",0.2,0.0001
0,15,12,3,10,1,"(256, 256)",0.2,0.0001


In [26]:
best_seqsignet_attention_bilstm_grp_20["f1"].mean()

0.6669388599388039

In [27]:
best_seqsignet_attention_bilstm_grp_20["precision"].mean()

0.6645031610204045

In [28]:
best_seqsignet_attention_bilstm_grp_20["recall"].mean()

0.67476666581131

In [29]:
np.stack(best_seqsignet_attention_bilstm_grp_20["f1_scores"]).mean(axis=0)

array([0.74742886, 0.58644886])

In [30]:
np.stack(best_seqsignet_attention_bilstm_grp_20["precision_scores"]).mean(axis=0)

array([0.78475405, 0.54425227])

In [31]:
np.stack(best_seqsignet_attention_bilstm_grp_20["recall_scores"]).mean(axis=0)

array([0.71358945, 0.63594388])

# history_length=35

In [32]:
shift = 3
window_size = 5
n = 11

## Random Projections

In [33]:
(
    seqsignet_attention_bilstm_grp_35,
    best_seqsignet_attention_bilstm_grp_35,
    _,
    __,
) = seqsignet_attention_bilstm_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_bilstm_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 11: history length = 35
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 11: history length = 35
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_11_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_11_kfold_best_model.csv


In [34]:
seqsignet_attention_bilstm_grp_35.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_bilstm_grp_35.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,lstm_hidden_dim,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.671264,0.642625,0.641871,0.64494,0.871546,0.864909,0.862451,0.867875,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.67114,0.660063,0.663695,0.67876,0.700274,0.694386,0.696932,0.707762,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.694066,0.667912,0.666546,0.670391,0.710833,0.69178,0.693388,0.690946,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.687543,0.665384,0.662847,0.670787,0.873433,0.867099,0.864328,0.870758,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.653246,0.645027,0.657235,0.670659,0.680954,0.676369,0.686139,0.694642,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.698602,0.673831,0.671886,0.676955,0.713528,0.694764,0.696004,0.693849,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.673936,0.647214,0.645917,0.650029,0.871816,0.865103,0.862939,0.867822,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.672693,0.658687,0.658867,0.672614,0.698297,0.689992,0.689498,0.698991,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.685927,0.666332,0.665222,0.674754,0.697668,0.683837,0.683304,0.68728,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.684374,0.660659,0.658609,0.665029,0.871501,0.865043,0.862248,0.868563,35.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [35]:
best_seqsignet_attention_bilstm_grp_35

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.679404,0.65756,"[0.7440476190476191, 0.571072319201995]",0.655159,"[0.7735148514851485, 0.5368026254102204]",0.66338,"[0.716743119266055, 0.6100159829515184]",,0.880173,...,0.0001,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.690401,0.672448,"[0.7491315511252077, 0.595765393039669]",0.66998,"[0.7915735716565592, 0.5483870967741935]",0.681557,"[0.7110091743119266, 0.6521044219499201]",,0.875724,...,0.0001,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.679963,0.654968,"[0.7478337494492584, 0.5621015047181842]",0.652888,"[0.7666365552544414, 0.5391389432485323]",0.658519,"[0.7299311926605505, 0.5871070857751731]",,0.874377,...,0.0001,123,focal,2,True,5,Conv1d,,concatenation,64


In [36]:
best_seqsignet_attention_bilstm_grp_35[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(256, 256)",0.2,0.0001
0,15,12,3,10,1,"(256, 256)",0.2,0.0001
0,15,12,3,10,1,"(256, 256)",0.2,0.0001


In [37]:
best_seqsignet_attention_bilstm_grp_35["f1"].mean()

0.6616586894303222

In [38]:
best_seqsignet_attention_bilstm_grp_35["precision"].mean()

0.6593422739715159

In [39]:
best_seqsignet_attention_bilstm_grp_35["recall"].mean()

0.667818496152524

In [40]:
np.stack(best_seqsignet_attention_bilstm_grp_35["f1_scores"]).mean(axis=0)

array([0.74700431, 0.57631307])

In [41]:
np.stack(best_seqsignet_attention_bilstm_grp_35["precision_scores"]).mean(axis=0)

array([0.77724166, 0.54144289])

In [42]:
np.stack(best_seqsignet_attention_bilstm_grp_35["recall_scores"]).mean(axis=0)

array([0.71922783, 0.61640916])

# history_length=80

In [43]:
shift = 3
window_size = 5
n = 26

## Random Projections

In [44]:
(
    seqsignet_attention_bilstm_grp_80,
    best_seqsignet_attention_bilstm_grp_80,
    _,
    __,
) = seqsignet_attention_bilstm_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_bilstm_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 26: history length = 80
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 26: history length = 80
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/5568 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_26_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in rumours_output/seqsignet_attention_bilstm_grp_focal_2_3_5_26_kfold_best_model.csv


In [45]:
seqsignet_attention_bilstm_grp_80.groupby(
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
).mean()

  seqsignet_attention_bilstm_grp_80.groupby(


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,accuracy,f1,precision,recall,valid_accuracy,valid_f1,valid_precision,valid_recall,k,shift,...,embedding_dim,num_features,log_signature,lstm_hidden_dim,seed,gamma,k_fold,n_splits,batch_size,model_id
dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
15,8,4,6,1,"(256, 256)",0.1,0.0001,0.675986,0.64947,0.647754,0.652221,0.891225,0.885021,0.884116,0.886062,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,13.0
15,8,4,6,1,"(256, 256)",0.1,0.0005,0.669463,0.653929,0.654504,0.666684,0.728265,0.720785,0.720528,0.731216,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,14.0
15,8,4,6,1,"(256, 256)",0.1,0.001,0.681392,0.663658,0.66313,0.673931,0.69969,0.688742,0.687973,0.694956,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,12.0
15,8,4,6,1,"(256, 256)",0.2,0.0001,0.683691,0.65354,0.653458,0.65425,0.888619,0.882149,0.88156,0.882779,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,16.0
15,8,4,6,1,"(256, 256)",0.2,0.0005,0.658465,0.651061,0.66206,0.676969,0.676057,0.67201,0.680779,0.690164,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,17.0
15,8,4,6,1,"(256, 256)",0.2,0.001,0.682945,0.66267,0.66022,0.669712,0.704587,0.69087,0.68917,0.693708,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,15.0
15,8,4,6,1,"(512, 512)",0.1,0.0001,0.67754,0.644982,0.645509,0.645336,0.892438,0.886302,0.885396,0.887359,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,19.0
15,8,4,6,1,"(512, 512)",0.1,0.0005,0.67959,0.663248,0.662061,0.674432,0.716224,0.707737,0.706344,0.716305,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,20.0
15,8,4,6,1,"(512, 512)",0.1,0.001,0.679466,0.661424,0.661984,0.672081,0.696006,0.684781,0.684986,0.691142,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,18.0
15,8,4,6,1,"(512, 512)",0.2,0.0001,0.699907,0.673554,0.672297,0.675457,0.888395,0.882179,0.880824,0.883752,80.0,3.0,...,384.0,2.0,1.0,384.0,45.333333,2.0,1.0,5.0,64.0,22.0


In [46]:
best_seqsignet_attention_bilstm_grp_80

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,learning_rate,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size
0,,0.661883,0.638186,"[0.7307806470762839, 0.5455911823647294]",0.636216,"[0.7575384615384615, 0.5148936170212766]",0.643015,"[0.705848623853211, 0.5801811401172083]",,0.893382,...,0.0005,1,focal,2,True,5,Conv1d,,concatenation,64
0,,0.705499,0.683212,"[0.7672362993517974, 0.5991882293252156]",0.680502,"[0.7890909090909091, 0.5719128329297821]",0.687878,"[0.7465596330275229, 0.6291955247735749]",,0.896482,...,0.0005,12,focal,2,True,5,Conv1d,,concatenation,64
0,,0.712582,0.682111,"[0.780529461998292, 0.5836933045356372]",0.683347,"[0.7750141322781232, 0.5916803503010399]",0.681021,"[0.7861238532110092, 0.5759190197123069]",,0.895673,...,0.0005,123,focal,2,True,5,Conv1d,,concatenation,64


In [47]:
best_seqsignet_attention_bilstm_grp_80[
    [
        "dimensions",
        "output_channels",
        "sig_depth",
        "num_heads",
        "num_layers",
        "ffn_hidden_dim",
        "dropout_rate",
        "learning_rate",
    ]
]

Unnamed: 0,dimensions,output_channels,sig_depth,num_heads,num_layers,ffn_hidden_dim,dropout_rate,learning_rate
0,15,12,3,10,1,"(256, 256)",0.1,0.0005
0,15,12,3,10,1,"(256, 256)",0.1,0.0005
0,15,12,3,10,1,"(256, 256)",0.1,0.0005


In [48]:
best_seqsignet_attention_bilstm_grp_80["f1"].mean()

0.667836520775326

In [49]:
best_seqsignet_attention_bilstm_grp_80["precision"].mean()

0.666688383859932

In [50]:
best_seqsignet_attention_bilstm_grp_80["recall"].mean()

0.6706379657824723

In [51]:
np.stack(best_seqsignet_attention_bilstm_grp_80["f1_scores"]).mean(axis=0)

array([0.75951547, 0.57615757])

In [52]:
np.stack(best_seqsignet_attention_bilstm_grp_80["precision_scores"]).mean(axis=0)

array([0.77388117, 0.5594956 ])

In [53]:
np.stack(best_seqsignet_attention_bilstm_grp_80["recall_scores"]).mean(axis=0)

array([0.74617737, 0.59509856])