In [1]:
import pandas as pd
import numpy as np
import torch
import transformers
import pickle
import os

import nlpsig
import nlpsig_networks

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
from nlpsig.classification_utils import split_dataset
from nlpsig_networks.pytorch_utils import training_pytorch, testing_pytorch, set_seed
from nlpsig_networks.ffn import FeedforwardNeuralNetModel
from nlpsig_networks.deepsignet import StackedDeepSigNet
from nlpsig_networks.focal_loss import FocalLoss, ClassBalanced_FocalLoss
from sklearn import metrics

from tqdm.notebook import tqdm

seed = 2023

In [2]:
import signatory

## AnnoMI

In [3]:
anno_mi = pd.read_csv("AnnoMI-full.csv")
anno_mi["datetime"] = pd.to_datetime(anno_mi["timestamp"])
anno_mi = anno_mi.drop(columns=["video_title", "video_url"])
anno_mi.head()

Unnamed: 0,mi_quality,transcript_id,topic,utterance_id,interlocutor,timestamp,utterance_text,annotator_id,therapist_input_exists,therapist_input_subtype,reflection_exists,reflection_subtype,question_exists,question_subtype,main_therapist_behaviour,client_talk_type,datetime
0,high,0,reducing alcohol consumption,0,therapist,00:00:13,Thanks for filling it out. We give this form t...,3,False,,False,,True,open,question,,2023-05-10 00:00:13
1,high,0,reducing alcohol consumption,1,client,00:00:24,Sure.,3,,,,,,,,neutral,2023-05-10 00:00:24
2,high,0,reducing alcohol consumption,2,therapist,00:00:25,"So, let's see. It looks that you put-- You dri...",3,True,information,False,,False,,therapist_input,,2023-05-10 00:00:25
3,high,0,reducing alcohol consumption,3,client,00:00:34,Mm-hmm.,3,,,,,,,,neutral,2023-05-10 00:00:34
4,high,0,reducing alcohol consumption,4,therapist,00:00:34,-and you usually have three to four drinks whe...,3,True,information,False,,False,,therapist_input,,2023-05-10 00:00:34


In [4]:
len(anno_mi)

13551

In [5]:
anno_mi["client_talk_type"].value_counts() / anno_mi["interlocutor"].value_counts()["client"]

neutral    0.627063
change     0.248030
sustain    0.124907
Name: client_talk_type, dtype: float64

In [6]:
anno_mi["interlocutor"].value_counts()

therapist    6826
client       6725
Name: interlocutor, dtype: int64

In [7]:
anno_mi["topic"].value_counts()

reducing alcohol consumption                                                          2326
more exercise / increasing activity                                                   2034
reducing recidivism                                                                   1303
reducing drug use                                                                     1104
diabetes management                                                                    948
smoking cessation                                                                      923
smoking cessation                                                                      541
taking medicine / following medical procedure                                          448
asthma management                                                                      431
avoiding DOI                                                                           394
changing approach to disease                                                           315

In [8]:
len(anno_mi["transcript_id"].unique())

133

## Only considering client for now...

In [9]:
client_index = [isinstance(x, str) for x in anno_mi["client_talk_type"]]
sum(client_index)

6725

In [10]:
y_data = anno_mi["client_talk_type"][client_index]
y_data.shape

(6725,)

In [11]:
y_data[0:20]

1     neutral
3     neutral
5     neutral
7     neutral
9     neutral
11    neutral
13    neutral
15    neutral
17    neutral
19    neutral
21    neutral
23    neutral
25    neutral
27    neutral
29    neutral
31    neutral
33    neutral
35     change
37     change
39     change
Name: client_talk_type, dtype: object

In [12]:
label_to_id = {y_data.unique()[i]: i for i in range(len(y_data.unique()))}
id_to_label = {v: k for k, v in label_to_id.items()}

In [13]:
label_to_id

{'neutral': 0, 'change': 1, 'sustain': 2}

In [14]:
id_to_label

{0: 'neutral', 1: 'change', 2: 'sustain'}

In [15]:
y_data = [label_to_id[x] for x in y_data]
y_data[0:20]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

## Obtaining SBERT Embeddings

We can use the `SentenceEncoder` class within `nlpsig` to obtain sentence embeddings from a model. This class uses the [`sentence-transformer`](https://www.sbert.net/docs/package_reference/SentenceTransformer.html) package and here, we have use the pre-trained `all-mpnet-base-v2` model by passing this name as a string to the class - alternative models can be found [here](https://www.sbert.net/docs/pretrained_models.html).

We can pass these into the constructor of the class to initialise our text encoder as follows:

In [17]:
# sbert_768_embeddings = np.load("anno_mi_sentence_embeddings_768.npy")

In [16]:
# initialise the Text Encoder
sbert_model_768 = "all-mpnet-base-v2"
text_encoder_sbert_768 = nlpsig.SentenceEncoder(df=anno_mi,
                                                feature_name="utterance_text",
                                                model_name=sbert_model_768)
text_encoder_sbert_768.load_pretrained_model()

The class has a `.encode_sentence_transformer()` method which first loads in the model (using the `model_name` and `model_args` attributes) and then obtains an embedding for each sentence. These sentence embeddings are then stored in the `embeddings_sentence` attribute of the object.

In [17]:
text_encoder_sbert_768.obtain_embeddings()
sbert_768_embeddings = text_encoder_sbert_768.sentence_embeddings

[INFO] number of sentences to encode: 13551


Batches:   0%|          | 0/212 [00:00<?, ?it/s]

In [18]:
np.save("anno_mi_sentence_embeddings_768", sbert_768_embeddings)

## SBERT with 384 dimension vectors

In [18]:
# sbert_384_embeddings = np.load("anno_mi_sentence_embeddings_384.npy")

In [20]:
# initialise the Text Encoder
sbert_model_384 = "all-MiniLM-L12-v2"
text_encoder_sbert_384 = nlpsig.SentenceEncoder(df=anno_mi,
                                                feature_name="utterance_text",
                                                model_name=sbert_model_384)
text_encoder_sbert_384.load_pretrained_model()

In [21]:
text_encoder_sbert_384.obtain_embeddings()
sbert_384_embeddings = text_encoder_sbert_384.sentence_embeddings

[INFO] number of sentences to encode: 13551


Batches:   0%|          | 0/212 [00:00<?, ?it/s]

In [22]:
np.save("anno_mi_sentence_embeddings_384", sbert_384_embeddings)

## Pretrained BERT and pooling

In [19]:
# pooled_mean_pretrained = np.load("anno_mi_pretrained_BERT_mean.npy")
# pooled_max_pretrained = np.load("anno_mi_pretrained_BERT_max.npy")
# pooled_sum_pretrained = np.load("anno_mi_pretrained_BERT_sum.npy")
# pooled_cls_pretrained = np.load("anno_mi_pretrained_BERT_cls.npy")

In [24]:
bert_model = "bert-base-uncased"

In [25]:
text_encoder_pretrained_BERT = nlpsig.TextEncoder(df=anno_mi,
                                                  feature_name="utterance_text",
                                                  model_name=bert_model)
text_encoder_pretrained_BERT.load_pretrained_model()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [26]:
text_encoder_pretrained_BERT.tokenize_text(skip_special_tokens=False)

[INFO] Setting return_special_tokens_mask=True
[INFO] Tokenizing the dataset...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Saving the tokenized text for each sentence into `.df['tokens']`...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Creating tokenized dataframe and setting in `.tokenized_df` attribute...
[INFO] Note: 'text_id' is the column name for denoting the corresponding text id


Dataset({
    features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask', 'tokens'],
    num_rows: 13551
})

In [27]:
token_embeddings_pretrained = text_encoder_pretrained_BERT.obtain_embeddings(method="hidden_layer")

  0%|          | 0/136 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [28]:
pooled_mean_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings()
pooled_max_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings(method="max")
pooled_sum_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings(method="sum") 
pooled_cls_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings(method="cls")

  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/13551 [00:00<?, ?it/s]

In [29]:
pooled_mean_pretrained.shape

(13551, 768)

In [30]:
pooled_max_pretrained.shape

(13551, 768)

In [31]:
pooled_sum_pretrained.shape

(13551, 768)

In [32]:
pooled_cls_pretrained.shape

(13551, 768)

In [33]:
np.save("anno_mi_pretrained_BERT_mean", pooled_mean_pretrained)
np.save("anno_mi_pretrained_BERT_max", pooled_max_pretrained)
np.save("anno_mi_pretrained_BERT_sum", pooled_sum_pretrained)
np.save("anno_mi_pretrained_BERT_cls", pooled_cls_pretrained)

## Fine-tuning BERT and pooling

### (Ignoring this part for now while, but will run this on GPU cluster soon...)

In [20]:
# pooled_mean = np.load("anno_mi_BERT_mean.npy")
# pooled_max = np.load("anno_mi_BERT_max.npy")
# pooled_sum = np.load("anno_mi_BERT_sum.npy")
# pooled_cls = np.load("anno_mi_BERT_cls.npy")

In [35]:
from transformers import (
    AutoModelForMaskedLM,
    AutoTokenizer,
    DataCollatorWithPadding,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    pipeline,
)

model = AutoModelForMaskedLM.from_pretrained(bert_model)
tokenizer = AutoTokenizer.from_pretrained(bert_model)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [36]:
text_encoder_BERT = nlpsig.TextEncoder(df=anno_mi,
                                       feature_name="utterance_text",
                                       model=model,
                                       tokenizer=tokenizer,
                                       data_collator=data_collator)

In [37]:
text_encoder_BERT.tokenize_text(skip_special_tokens=False)

[INFO] Setting return_special_tokens_mask=True
[INFO] Tokenizing the dataset...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Saving the tokenized text for each sentence into `.df['tokens']`...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Creating tokenized dataframe and setting in `.tokenized_df` attribute...
[INFO] Note: 'text_id' is the column name for denoting the corresponding text id


Dataset({
    features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask'],
    num_rows: 13551
})

### Training the model

In [38]:
# set up data_collator for language modelling (has dynamic padding)
data_collator_for_LM = DataCollatorForLanguageModeling(tokenizer=tokenizer,
                                                       mlm=True,
                                                       mlm_probability=0.15)

In [39]:
text_encoder_BERT.split_dataset(seed=seed)

[INFO] Splitting up dataset into train / validation / test sets, and saving to `.dataset_split`.


DatasetDict({
    train: Dataset({
        features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask'],
        num_rows: 10840
    })
    test: Dataset({
        features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask'],
        num_rows: 1356
    })
    validation: Dataset({
        features: ['mi_qua

In [40]:
type(text_encoder_BERT.dataset_split)

datasets.dataset_dict.DatasetDict

In [41]:
model_name = "bert-base-uncased-anno-mi"
text_encoder_BERT.set_up_training_args(output_dir=model_name,
                                  num_train_epochs=600,
                                  per_device_train_batch_size=128,
                                  disable_tqdm=False,
                                  save_strategy="steps",
                                  save_steps=10000,
                                  seed=seed)

[INFO] Setting up TrainingArguments object and saving to `.training_args`.


TrainingArguments(
_n_gpu=0,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ign

In [42]:
type(text_encoder_BERT.training_args)

transformers.training_args.TrainingArguments

In [43]:
text_encoder_BERT.set_up_trainer(data_collator=data_collator_for_LM)

[INFO] Setting up Trainer object, and saving to `.trainer`.


<transformers.trainer.Trainer at 0x2b04c1e80>

In [44]:
type(text_encoder_BERT.trainer)

transformers.trainer.Trainer

In [45]:
torch.cuda.is_available()

False

In [46]:
torch.cuda.device_count()

0

In [47]:
# set to only report errors to avoid excessing logging
transformers.utils.logging.set_verbosity(40)

In [None]:
text_encoder_BERT.fit_transformer_with_trainer_api()

[INFO] Training model with 109514298 parameters...




Epoch,Training Loss,Validation Loss


In [None]:
text_encoder_BERT.trainer.save_model(model_name)

### Evaluating model on masked language modelling task

In [None]:
text_encoder_BERT.tokenizer.special_tokens_map

In [None]:
def compute_masked_character_accuracy(fill_mask, words):
    was_correct = []
    print(f"Evaluating with {len(words)} words")
    for word in tqdm(words):
        masked_strings = [word[:i] + '<mask>' + word[i+1:] for i in range(len(word))]
        predictions = [fill_mask(word)[0]['sequence'] for word in masked_strings]
        was_correct += [pred == word for pred in predictions]
    
    acc = np.sum(was_correct) / len(was_correct)
    print(f"Accuracy: {acc}")
    return acc

In [None]:
fill_mask = pipeline("fill-mask",
                     model=model_name,
                     tokenizer=model_name)

compute_masked_character_accuracy(fill_mask, text_encoder_BERT.dataset_split["test"]["word"])

### Obtain embeddings from model

In [None]:
# setting the model to CPU (might not be always necessary to run this)
text_encoder_BERT.model.to('cpu')
token_embeddings = text_encoder_BERT.obtain_embeddings(method="hidden_layer")

In [None]:
token_embeddings.shape

In [None]:
pooled_mean = text_encoder_BERT.pool_token_embeddings()
pooled_max = text_encoder_BERT.pool_token_embeddings(method="max")
pooled_sum = text_encoder_BERT.pool_token_embeddings(method="sum")
pooled_cls = text_encoder_BERT.pool_token_embeddings(method="cls")

In [None]:
pooled_mean.shape

In [None]:
pooled_max.shape

In [None]:
pooled_sum.shape

In [None]:
pooled_cls.shape

In [None]:
np.save("anno_mi_BERT_mean", pooled_mean)
np.save("anno_mi_BERT_max", pooled_max)
np.save("anno_mi_BERT_sum", pooled_sum)
np.save("anno_mi_BERT_cls", pooled_cls)

# Baseline 1: FFN baseline

Using the embeddings for the sentences directly in a FFN.

Below is a function that takes in some inputs x_data, y_data and fits a FFN. Will do early stopping if the F1 score continually gets worse.

In [81]:
def implement_ffn(x_data,
                  y_data,
                  hidden_dim,
                  learning_rate,
                  loss,
                  gamma=0):
    # set seed
    set_seed(seed)
    
    # initialise FFN
    ffn_model = FeedforwardNeuralNetModel(input_dim=x_data.shape[1],
                                          hidden_dim=hidden_dim,
                                          output_dim=len(label_to_id),
                                          dropout_rate=0.1)
    # print(ffn_model)
    
    # split dataset
    train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),
                                       y_data=torch.tensor(y_data),
                                       train_size=0.8,
                                       valid_size=0.5,
                                       shuffle=True,
                                       as_DataLoader=True,
                                       seed=seed)

    # define loss
    if loss == "focal":
        criterion = FocalLoss(gamma = gamma)
    elif loss == "cross_entropy":
        criterion = torch.nn.CrossEntropyLoss()

    # define optimizer
    optimizer = torch.optim.Adam(ffn_model.parameters(), lr=learning_rate)
    # define scheduler for adjusting the learning rate
    scheduler = ReduceLROnPlateau(optimizer, 'min')
    # scheduler = StepLR(optimizer, step_size = 4, gamma = 0.5)
    # scheduler = None
    
    ffn_model = training_pytorch(model=ffn_model,
                                 train_loader=train,
                                 criterion=criterion,
                                 optimizer=optimizer,
                                 num_epochs=10000,
                                 scheduler=scheduler,
                                 valid_loader=valid,
                                 seed=seed,
                                 early_stopping=True,
                                 early_stopping_metric="f1",
                                 patience=10,
                                 verbose=True,
                                 verbose_epoch=100)

    pred, label = testing_pytorch(ffn_model, test, criterion)
    print(f"proportion of labels in prediction: {[sum(pred==i)/len(pred) for i in label_to_id.values()]}")
    print(f"proportion of labels in data: {[sum(label==i)/len(label) for i in label_to_id.values()]}")
    
    f1_scores = metrics.f1_score(label, pred, average=None)
    print(f"- f1: {f1_scores}")
    print(f"- f1 (average): {sum(f1_scores)/len(f1_scores)}")
    print(f"- accuracy: {sum(pred==label)/len(pred)}")
    
    return ffn_model

Going to try out some variations (1 hidden layer, 2 hidden layers and 3 hidden layers - all of size 100)

In [82]:
hidden_dim_trials = [[100]*i for i in range(1, 6)]
learning_rate = 1e-4
loss = "cross_entropy"

In [83]:
hidden_dim_trials

[[100],
 [100, 100],
 [100, 100, 100],
 [100, 100, 100, 100],
 [100, 100, 100, 100, 100]]

## SBERT 768

In [84]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=sbert_768_embeddings[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0885392427444458
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.023568868637085
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.016163945198059 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 40!
Accuracy on dataset of size 672: 71.2797622680664 %.
Average loss: 0.6995780386707999
proportion of labels in prediction: [tensor(0.7173), tensor(0.1845), tensor(0.0982)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81243063 0.53284672 0.47337278]
- f1 (average): 0.6062167096746555
- accuracy: 0.7127976417541504

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.116580843925476
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0521271228790283
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.026239974932237 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 45!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.7389487461610273
proportion of labels in prediction: [tensor(0.6875), tensor(0.2024), tensor(0.1101)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80817253 0.51748252 0.46327684]
- f1 (average): 0.5963106282850795
- accuracy: 0.7008928656578064

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0993850231170654
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0102291107177734
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.994836606762626 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 42!
Accuracy on dataset of size 672: 70.23809814453125 %.
Average loss: 0.7553030956875194
proportion of labels in prediction: [tensor(0.6801), tensor(0.2098), tensor(0.1101)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80821918 0.52920962 0.46327684]
- f1 (average): 0.600235212077837
- accuracy: 0.7023809552192688

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.072503924369812
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.993831992149353
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9791417772119696 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 47!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.8187992193482139
proportion of labels in prediction: [tensor(0.6801), tensor(0.1890), tensor(0.1310)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79908676 0.50541516 0.46073298]
- f1 (average): 0.5884116349129783
- accuracy: 0.6904761791229248

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1058584451675415
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0399665832519531
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0396888039328835 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 50!
Accuracy on dataset of size 672: 69.19642639160156 %.
Average loss: 0.8239231001247059
proportion of labels in prediction: [tensor(0.7113), tensor(0.1815), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79821628 0.51470588 0.42285714]
- f1 (average): 0.57859310056241
- accuracy: 0.6919642686843872


## SBERT 384

In [85]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=sbert_384_embeddings[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0692368745803833
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0163383483886719
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0099996599284085 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 52!
Accuracy on dataset of size 672: 71.875 %.
Average loss: 0.7035864103924144
proportion of labels in prediction: [tensor(0.7232), tensor(0.1696), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81767956 0.51515152 0.51428571]
- f1 (average): 0.6157055958160931
- accuracy: 0.71875

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1142284870147705
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0404751300811768
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.043692480434071 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 45!
Accuracy on dataset of size 672: 70.53571319580078 %.
Average loss: 0.7237868959253485
proportion of labels in prediction: [tensor(0.7247), tensor(0.1741), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80573951 0.51685393 0.46783626]
- f1 (average): 0.5968099014143323
- accuracy: 0.7053571343421936

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0598180294036865
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9991912245750427
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.004194275899367 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 67!
Accuracy on dataset of size 672: 70.68452453613281 %.
Average loss: 0.7685691226612438
proportion of labels in prediction: [tensor(0.6979), tensor(0.1830), tensor(0.1190)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8018018  0.54212454 0.49180328]
- f1 (average): 0.6119098742049561
- accuracy: 0.706845223903656

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.075019359588623
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9795213937759399
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0088304281234741 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 31!
Accuracy on dataset of size 672: 65.92262268066406 %.
Average loss: 0.8153174790469083
proportion of labels in prediction: [tensor(0.7188), tensor(0.2812), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81374723 0.44837758 0.        ]
- f1 (average): 0.42070826983410625
- accuracy: 0.6592261791229248

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1322060823440552
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0921815633773804
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0522005774758079 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 49!
Accuracy on dataset of size 672: 66.81547546386719 %.
Average loss: 0.814079609784213
proportion of labels in prediction: [tensor(0.7009), tensor(0.2321), tensor(0.0670)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80449438 0.43137255 0.33783784]
- f1 (average): 0.524568256293306
- accuracy: 0.668154776096344


## Pretrained BERT

### Mean pooled

In [86]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_mean_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0839786529541016
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7457855343818665
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8354992433027788 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 69.79166412353516 %.
Average loss: 0.7122157866304571
proportion of labels in prediction: [tensor(0.7039), tensor(0.2083), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80269058 0.51724138 0.44444444]
- f1 (average): 0.5881254689048102
- accuracy: 0.6979166865348816

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1131868362426758
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8533899784088135
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8532686233520508 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 35!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.701988697052002
proportion of labels in prediction: [tensor(0.6949), tensor(0.2128), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81038375 0.53924915 0.47272727]
- f1 (average): 0.6074533888877603
- accuracy: 0.7098214030265808

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.098788857460022
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8470583558082581
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8576378388838335 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 36!
Accuracy on dataset of size 672: 70.83333587646484 %.
Average loss: 0.7304971489039335
proportion of labels in prediction: [tensor(0.6830), tensor(0.2188), tensor(0.0982)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80637813 0.55892256 0.46153846]
- f1 (average): 0.6089463841931573
- accuracy: 0.7083333134651184

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0730299949645996
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.835125207901001
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8732638359069824 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 45!
Accuracy on dataset of size 672: 70.68452453613281 %.
Average loss: 0.7486193478107452
proportion of labels in prediction: [tensor(0.7068), tensor(0.1994), tensor(0.0938)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81208054 0.52112676 0.45783133]
- f1 (average): 0.5970128742591122
- accuracy: 0.706845223903656

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1056439876556396
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9540625214576721
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.882446364922957 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 51!
Accuracy on dataset of size 672: 72.76786041259766 %.
Average loss: 0.7767329216003418
proportion of labels in prediction: [tensor(0.6905), tensor(0.1979), tensor(0.1116)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81993205 0.55123675 0.5505618 ]
- f1 (average): 0.6405768655665137
- accuracy: 0.7276785969734192


### Max pooled

In [87]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_max_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0957067012786865
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7537550926208496
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8680760372768749 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 52!
Accuracy on dataset of size 672: 69.79166412353516 %.
Average loss: 0.7068305503238331
proportion of labels in prediction: [tensor(0.7173), tensor(0.2054), tensor(0.0774)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80133185 0.52777778 0.41290323]
- f1 (average): 0.580670952360115
- accuracy: 0.6979166865348816

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1076050996780396
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9455707669258118
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9177429025823419 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 53!
Accuracy on dataset of size 672: 71.42857360839844 %.
Average loss: 0.7085082368417219
proportion of labels in prediction: [tensor(0.7202), tensor(0.2024), tensor(0.0774)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80841639 0.53846154 0.49032258]
- f1 (average): 0.6124001696394794
- accuracy: 0.7142857313156128

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0997354984283447
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.929989755153656
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9196747053753246 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 53!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.7250180461189963
proportion of labels in prediction: [tensor(0.7024), tensor(0.2143), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8013468  0.53741497 0.44025157]
- f1 (average): 0.5930044465534133
- accuracy: 0.7008928656578064

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0746076107025146
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9190363883972168
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9312605370174755 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 53!
Accuracy on dataset of size 672: 69.64286041259766 %.
Average loss: 0.7155440043319355
proportion of labels in prediction: [tensor(0.7083), tensor(0.2054), tensor(0.0863)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79553073 0.52083333 0.45962733]
- f1 (average): 0.5919971295942877
- accuracy: 0.6964285969734192

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1055570840835571
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.035354733467102
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9438045024871826 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 58!
Accuracy on dataset of size 672: 70.23809814453125 %.
Average loss: 0.7381011681123213
proportion of labels in prediction: [tensor(0.7024), tensor(0.2068), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80359147 0.52595156 0.46341463]
- f1 (average): 0.5976525538326346
- accuracy: 0.7023809552192688


### Sum pooled

In [88]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_sum_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 2.0271031856536865
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8685790300369263
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8226919986984946 || Accuracy: 0.6463595628738403 || F1-score: 0.4247232595366326
Early stopping at epoch 31!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.774150935086337
proportion of labels in prediction: [tensor(0.7173), tensor(0.2039), tensor(0.0789)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81021088 0.55052265 0.42307692]
- f1 (average): 0.5946034826546994
- accuracy: 0.7098214030265808

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.114878535270691
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7568445205688477
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8589342778379266 || Accuracy: 0.6508172154426575 || F1-score: 0.46548398969451604
Early stopping at epoch 35!
Accuracy on dataset of size 672: 70.83333587646484 %.
Average loss: 0.7620173096656799
proportion of labels in prediction: [tensor(0.7039), tensor(0.2113), tensor(0.0848)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80493274 0.56849315 0.425     ]
- f1 (average): 0.5994752953703134
- accuracy: 0.7083333134651184

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.111946940422058
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8133658766746521
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8259054476564581 || Accuracy: 0.637444257736206 || F1-score: 0.4151506273197447
Early stopping at epoch 29!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.8185790181159973
proportion of labels in prediction: [tensor(0.6815), tensor(0.2277), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8027366  0.55445545 0.45121951]
- f1 (average): 0.6028038532640426
- accuracy: 0.7038690447807312

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0758711099624634
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8967788219451904
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9189632643352855 || Accuracy: 0.6315007209777832 || F1-score: 0.30164060280829436
Early stopping at epoch 37!
Accuracy on dataset of size 672: 72.32142639160156 %.
Average loss: 0.8004374504089355
proportion of labels in prediction: [tensor(0.7024), tensor(0.1935), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81481481 0.56428571 0.50867052]
- f1 (average): 0.6292570164439142
- accuracy: 0.7232142686843872

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1112335920333862
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9961893558502197
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.975874434817921 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 41!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.9458087682723999
proportion of labels in prediction: [tensor(0.6830), tensor(0.2158), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79954442 0.53559322 0.47953216]
- f1 (average): 0.6048899344053565
- accuracy: 0.7008928656578064


### CLS

In [89]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_cls_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.072586178779602
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7834479808807373
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8409460566260598 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 71.875 %.
Average loss: 0.6774346611716531
proportion of labels in prediction: [tensor(0.7366), tensor(0.1786), tensor(0.0848)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81619256 0.53333333 0.475     ]
- f1 (average): 0.6081752978361293
- accuracy: 0.71875

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1158097982406616
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9066270589828491
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8640799901702187 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 38!
Accuracy on dataset of size 672: 72.02381134033203 %.
Average loss: 0.673450849272988
proportion of labels in prediction: [tensor(0.7351), tensor(0.1741), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81489595 0.52434457 0.51219512]
- f1 (average): 0.617145212888559
- accuracy: 0.7202380895614624

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.099726676940918
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9140689969062805
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8757737441496416 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 44!
Accuracy on dataset of size 672: 71.57737731933594 %.
Average loss: 0.6988080143928528
proportion of labels in prediction: [tensor(0.7068), tensor(0.1964), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81208054 0.5248227  0.52380952]
- f1 (average): 0.6202375852525788
- accuracy: 0.7157738208770752

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0739099979400635
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8497714996337891
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8862914334643971 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 36!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.7242212187160145
proportion of labels in prediction: [tensor(0.6786), tensor(0.2381), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79771429 0.5483871  0.46540881]
- f1 (average): 0.6038367291733087
- accuracy: 0.7008928656578064

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1050199270248413
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0113009214401245
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8908429525115273 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 60!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.7129268104379828
proportion of labels in prediction: [tensor(0.7217), tensor(0.1771), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80530973 0.49814126 0.49122807]
- f1 (average): 0.5982263562097445
- accuracy: 0.7038690447807312


## Fine-tuned BERT

### Mean pooled

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_mean_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=1e-5,
                  loss="cross_entropy")

### Max pooled

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_max_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### Sum pooled

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_sum_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### CLS

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_sum_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

# Baseline 2: Averaging history and use FFN

Here, we will use `nlpsig` to construct some paths of embeddings which we will average and use those in a FFN.

First, we define the arguments for how we want to construct our path. As we're going to just do a simple average of embeddings, I'll set zero padding as false, and construct the path by looking at the last `k` posts.

We will consider one where we average their histories and that is the only inputs to the FFN. Alternatively, we can concatenate the full post embedding as well.

In [181]:
path_specifics = {"pad_by": "history",
                  "zero_padding": False,
                  "method": "k_last",
                  "k": 10,
                  "time_feature": None,
                  "embeddings": "full",
                  "include_current_embedding": True}

In [182]:
def obtain_mean_history(embeddings, path_specifics, concatenate_current = True):
    paths = nlpsig.PrepareData(anno_mi,
                               id_column="transcript_id",
                               label_column="client_talk_type",
                               embeddings=embeddings)
    path = paths.pad(**path_specifics)
    # remove last two columns (which contains the id and the label)
    path = path[client_index][:,:,:-2]
    # average in the first dimension
    path = path.mean(1).astype("float")
    # concatenate with current embedding
    if concatenate_current:
        path = np.concatenate([path, embeddings[client_index]], axis=1)
    return path

## SBERT 768

In [183]:
path_history = obtain_mean_history(sbert_768_embeddings, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0951521396636963
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.989008367061615
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9893851226026361 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 50!
Accuracy on dataset of size 672: 72.02381134033203 %.
Average loss: 0.6766794919967651
proportion of labels in prediction: [tensor(0.6949), tensor(0.2054), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81489842 0.57638889 0.47058824]
- f1 (average): 0.6206251813491889
- accuracy: 0.7202380895614624

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0840717554092407
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9826463460922241
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9629483819007874 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 41!
Accuracy on dataset of size 672: 71.72618865966797 %.
Average loss: 0.6966309574517336
proportion of labels in prediction: [tensor(0.6741), tensor(0.2217), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80963303 0.58862876 0.47398844]
- f1 (average): 0.6240834097903668
- accuracy: 0.7172619104385376

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0726923942565918
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.004392385482788
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9865520596504211 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 42!
Accuracy on dataset of size 672: 71.13095092773438 %.
Average loss: 0.7030555768446489
proportion of labels in prediction: [tensor(0.6637), tensor(0.2173), tensor(0.1190)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80231214 0.58783784 0.48087432]
- f1 (average): 0.6236747645020175
- accuracy: 0.711309552192688

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.094164490699768
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.024963617324829
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.981616123156114 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 72.76786041259766 %.
Average loss: 0.7731133970347318
proportion of labels in prediction: [tensor(0.6756), tensor(0.2113), tensor(0.1131)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81786942 0.61643836 0.46927374]
- f1 (average): 0.6345271716629012
- accuracy: 0.7276785969734192

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0790125131607056
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0606188774108887
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0005513754757969 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 42!
Accuracy on dataset of size 672: 68.1547622680664 %.
Average loss: 0.7737902836366133
proportion of labels in prediction: [tensor(0.7113), tensor(0.2887), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82274247 0.51744186 0.        ]
- f1 (average): 0.4467281117938347
- accuracy: 0.6815476417541504


In [184]:
path_history = obtain_mean_history(sbert_768_embeddings, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0880542993545532
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0509945154190063
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0244701613079419 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.5583634972572327
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.585810661315918
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.839062815362757 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 201/10000 || Item: 0/85 || Loss: 0.7235603928565979
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.9222613573074341
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.8442041603001681 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 301/10000 |

Epoch: 2501/10000 || Item: 0/85 || Loss: 0.7536625862121582
--------------------------------------------------
##### Epoch: 2501/10000 || Loss: 0.5907317996025085
--------------------------------------------------
Validation || Epoch: 2501 || Loss: 0.8393472324718129 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 2601/10000 || Item: 0/85 || Loss: 0.7094292640686035
--------------------------------------------------
##### Epoch: 2601/10000 || Loss: 1.0559871196746826
--------------------------------------------------
Validation || Epoch: 2601 || Loss: 0.8366749557581815 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 2701/10000 || Item: 0/85 || Loss: 0.6577372550964355
--------------------------------------------------
##### Epoch: 2701/10000 || Loss: 0.7832667231559753
--------------------------------------------------
Validation || Epoch: 2701 || Loss: 0.839256075295535 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Ep

Epoch: 5001/10000 || Item: 0/85 || Loss: 0.6542056798934937
--------------------------------------------------
##### Epoch: 5001/10000 || Loss: 0.5106123685836792
--------------------------------------------------
Validation || Epoch: 5001 || Loss: 0.8313697413964705 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 5101/10000 || Item: 0/85 || Loss: 0.6271029710769653
--------------------------------------------------
##### Epoch: 5101/10000 || Loss: 0.7818343639373779
--------------------------------------------------
Validation || Epoch: 5101 || Loss: 0.8302126526832581 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 5201/10000 || Item: 0/85 || Loss: 0.5383269786834717
--------------------------------------------------
##### Epoch: 5201/10000 || Loss: 1.1728729009628296
--------------------------------------------------
Validation || Epoch: 5201 || Loss: 0.851501004262404 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Ep

Epoch: 7501/10000 || Item: 0/85 || Loss: 0.5564772486686707
--------------------------------------------------
##### Epoch: 7501/10000 || Loss: 0.8176395297050476
--------------------------------------------------
Validation || Epoch: 7501 || Loss: 0.8336770209399137 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 7601/10000 || Item: 0/85 || Loss: 0.622069239616394
--------------------------------------------------
##### Epoch: 7601/10000 || Loss: 0.6505205631256104
--------------------------------------------------
Validation || Epoch: 7601 || Loss: 0.8343544602394104 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Epoch: 7701/10000 || Item: 0/85 || Loss: 0.5496020317077637
--------------------------------------------------
##### Epoch: 7701/10000 || Loss: 0.4282279908657074
--------------------------------------------------
Validation || Epoch: 7701 || Loss: 0.8355135267431085 || Accuracy: 0.6433877944946289 || F1-score: 0.41885211195699923
Ep

Accuracy on dataset of size 672: 66.36904907226562 %.
Average loss: 0.784445892680775
proportion of labels in prediction: [tensor(0.8408), tensor(0.1384), tensor(0.0208)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78252033 0.41152263 0.18803419]
- f1 (average): 0.460692382327432
- accuracy: 0.663690447807312

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1165680885314941
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0671789646148682
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.034122884273529 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 55!
Accuracy on dataset of size 672: 65.47618865966797 %.
Average loss: 0.8014345873485912
proportion of labels in prediction: [tensor(0.8497), tensor(0.1220), tensor(0.0283)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78383838 0.31896552 0.24590164]
- f1 (average): 0.4495685134746752
- accuracy: 0.6547619104385376

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0994811058044434
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0283174514770508
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0029825622385198 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 54!
Accuracy on dataset of size 672: 62.5 %.
Average loss: 0.8602861762046814
proportion of labels in prediction: [tensor(0.7798), tensor(0.2202), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78685048 0.32885906 0.        ]
- f1 (average): 0.37190317920103627
- accuracy: 0.625

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0725291967391968
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0014322996139526
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9836918874220415 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 62!
Accuracy on dataset of size 672: 62.35118865966797 %.
Average loss: 0.8477486642924222
proportion of labels in prediction: [tensor(0.8690), tensor(0.1101), tensor(0.0208)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77966102 0.16071429 0.17094017]
- f1 (average): 0.37043849120120304
- accuracy: 0.6235119104385376

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1058520078659058
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0424485206604004
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0408807884563098 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 49!
Accuracy on dataset of size 672: 63.83928680419922 %.
Average loss: 0.8748811971057545
proportion of labels in prediction: [tensor(0.7902), tensor(0.1801), tensor(0.0298)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78526316 0.32472325 0.19512195]
- f1 (average): 0.43503611878224047
- accuracy: 0.6383928656578064


## SBERT 384

In [185]:
path_history = obtain_mean_history(sbert_384_embeddings, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.088646411895752
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0224132537841797
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0024505149234424 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 49!
Accuracy on dataset of size 672: 71.57737731933594 %.
Average loss: 0.6831169778650458
proportion of labels in prediction: [tensor(0.7128), tensor(0.1801), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81514477 0.52398524 0.50285714]
- f1 (average): 0.6139957162855116
- accuracy: 0.7157738208770752

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1163111925125122
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0465764999389648
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0169885971329429 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 47!
Accuracy on dataset of size 672: 71.72618865966797 %.
Average loss: 0.7060108239000494
proportion of labels in prediction: [tensor(0.6875), tensor(0.1890), tensor(0.1235)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81725312 0.52707581 0.52688172]
- f1 (average): 0.6237368847191235
- accuracy: 0.7172619104385376

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0996454954147339
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0017808675765991
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.982284361665899 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 57!
Accuracy on dataset of size 672: 71.72618865966797 %.
Average loss: 0.7493027719584379
proportion of labels in prediction: [tensor(0.6860), tensor(0.2098), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81818182 0.56357388 0.46242775]
- f1 (average): 0.6147278156693566
- accuracy: 0.7172619104385376

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.072601079940796
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9884348511695862
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9732580510052767 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 49!
Accuracy on dataset of size 672: 70.53571319580078 %.
Average loss: 0.7905030711130663
proportion of labels in prediction: [tensor(0.6726), tensor(0.2188), tensor(0.1086)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80367394 0.54545455 0.48863636]
- f1 (average): 0.6125882823644018
- accuracy: 0.7053571343421936

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1058897972106934
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0390312671661377
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0375065803527832 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 48!
Accuracy on dataset of size 672: 67.41071319580078 %.
Average loss: 0.822012028910897
proportion of labels in prediction: [tensor(0.7202), tensor(0.2798), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81284607 0.50887574 0.        ]
- f1 (average): 0.4405739361016641
- accuracy: 0.6741071343421936


In [186]:
path_history = obtain_mean_history(sbert_384_embeddings, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.068945288658142
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0284571647644043
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0144835060293025 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 83!
Accuracy on dataset of size 672: 63.69047546386719 %.
Average loss: 0.826813361861489
proportion of labels in prediction: [tensor(0.8304), tensor(0.1488), tensor(0.0208)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76151484 0.352      0.20512821]
- f1 (average): 0.43954768215975987
- accuracy: 0.636904776096344

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.114108681678772
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0454522371292114
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0465515418486162 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 73!
Accuracy on dataset of size 672: 68.1547622680664 %.
Average loss: 0.8352445689114657
proportion of labels in prediction: [tensor(0.7872), tensor(0.1711), tensor(0.0417)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79746835 0.43773585 0.33587786]
- f1 (average): 0.5236940220274677
- accuracy: 0.6815476417541504

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.059796690940857
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.002020239830017
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0058825016021729 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 56!
Accuracy on dataset of size 672: 62.7976188659668 %.
Average loss: 0.8746451193636114
proportion of labels in prediction: [tensor(0.8274), tensor(0.1696), tensor(0.0030)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78769231 0.28030303 0.01904762]
- f1 (average): 0.3623476523476523
- accuracy: 0.6279761791229248

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0749471187591553
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9816824197769165
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0096874778920955 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 58!
Accuracy on dataset of size 672: 62.5 %.
Average loss: 0.8662289001724937
proportion of labels in prediction: [tensor(0.8661), tensor(0.1339), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77922078 0.25       0.        ]
- f1 (average): 0.34307359307359303
- accuracy: 0.625

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.132214903831482
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0944277048110962
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0532963817769831 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 51!
Accuracy on dataset of size 672: 65.625 %.
Average loss: 0.8680606105110862
proportion of labels in prediction: [tensor(0.8661), tensor(0.0565), tensor(0.0774)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78921079 0.18085106 0.37419355]
- f1 (average): 0.44808513380922443
- accuracy: 0.65625


## Pretrained BERT

### Mean pooled

In [187]:
path_history = obtain_mean_history(pooled_mean_pretrained, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.102067232131958
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8260780572891235
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.847004462372173 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 53!
Accuracy on dataset of size 672: 71.13095092773438 %.
Average loss: 0.6744792082092979
proportion of labels in prediction: [tensor(0.7143), tensor(0.2068), tensor(0.0789)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80978865 0.55363322 0.43589744]
- f1 (average): 0.5997731026501941
- accuracy: 0.711309552192688

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0826632976531982
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7909537553787231
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8423506671732123 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 46!
Accuracy on dataset of size 672: 72.61904907226562 %.
Average loss: 0.732296651059931
proportion of labels in prediction: [tensor(0.6949), tensor(0.2217), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82392777 0.57525084 0.46540881]
- f1 (average): 0.6215291354629561
- accuracy: 0.726190447807312

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0727503299713135
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8658488988876343
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8703606507994912 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.44898688793182373
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.0886840745806694
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.706783879886974 || Accuracy: 0.720653772354126 || F1-score: 0.623069393795254
Early stopping at epoch 118!
Accuracy on dataset of size 672: 71.57737731933594 %.
Average loss: 0.7238770777528937
proportion of labels in prediction: [tensor(0.6949), tensor(0.2128), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81264108 0.5665529  0.46060606]
- f1 (average): 0.6132666817

  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0943586826324463
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9349538683891296
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8743602362546053 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 54!
Accuracy on dataset of size 672: 72.02381134033203 %.
Average loss: 0.7312079180370678
proportion of labels in prediction: [tensor(0.6905), tensor(0.2217), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81993205 0.55518395 0.48148148]
- f1 (average): 0.6188658259333001
- accuracy: 0.7202380895614624

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0790555477142334
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0316280126571655
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8944603638215498 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 49!
Accuracy on dataset of size 672: 72.02381134033203 %.
Average loss: 0.7407215400175615
proportion of labels in prediction: [tensor(0.6801), tensor(0.2188), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81050228 0.57239057 0.51461988]
- f1 (average): 0.6325042461788436
- accuracy: 0.7202380895614624


In [188]:
path_history = obtain_mean_history(pooled_mean_pretrained, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0823326110839844
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9887112379074097
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9002521959218112 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 64.28571319580078 %.
Average loss: 0.841454104943709
proportion of labels in prediction: [tensor(0.9182), tensor(0.0670), tensor(0.0149)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77220077 0.24615385 0.14159292]
- f1 (average): 0.3866491795695335
- accuracy: 0.6428571343421936

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1143786907196045
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9647703766822815
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8960050561211326 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 53!
Accuracy on dataset of size 672: 65.625 %.
Average loss: 0.8285364671186968
proportion of labels in prediction: [tensor(0.8438), tensor(0.1310), tensor(0.0253)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77281947 0.40336134 0.2       ]
- f1 (average): 0.4587269390514826
- accuracy: 0.65625

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.099800705909729
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.982613742351532
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9065857475454157 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 52!
Accuracy on dataset of size 672: 64.73213958740234 %.
Average loss: 0.8655710599639199
proportion of labels in prediction: [tensor(0.8467), tensor(0.1280), tensor(0.0253)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77530364 0.3559322  0.16666667]
- f1 (average): 0.4326341712603979
- accuracy: 0.6473214030265808

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.072922706604004
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8768916726112366
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9030661312016574 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 45!
Accuracy on dataset of size 672: 64.58333587646484 %.
Average loss: 0.8798430941321633
proportion of labels in prediction: [tensor(0.8780), tensor(0.0893), tensor(0.0327)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78097126 0.25714286 0.208     ]
- f1 (average): 0.41537137193826984
- accuracy: 0.6458333134651184

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.105739951133728
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0086760520935059
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9046244783834978 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 51!
Accuracy on dataset of size 672: 63.69047546386719 %.
Average loss: 0.9047468304634094
proportion of labels in prediction: [tensor(0.9167), tensor(0.0625), tensor(0.0208)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77487923 0.1875     0.15384615]
- f1 (average): 0.3720751269664313
- accuracy: 0.636904776096344


### Max pooled

In [189]:
path_history = obtain_mean_history(pooled_max_pretrained, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.107502818107605
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7524529695510864
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8631793043830178 || Accuracy: 0.6225854158401489 || F1-score: 0.2565044383226201
Early stopping at epoch 65!
Accuracy on dataset of size 672: 71.72618865966797 %.
Average loss: 0.695426426150582
proportion of labels in prediction: [tensor(0.7381), tensor(0.1845), tensor(0.0774)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81530055 0.55474453 0.42580645]
- f1 (average): 0.5986171745361454
- accuracy: 0.7172619104385376

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0758923292160034
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8602192401885986
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8644225434823469 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 66!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.7158872539346869
proportion of labels in prediction: [tensor(0.6830), tensor(0.2366), tensor(0.0804)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79954442 0.5631068  0.44585987]
- f1 (average): 0.6028370292874553
- accuracy: 0.7038690447807312

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0745344161987305
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.903744637966156
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8973067511211742 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 60!
Accuracy on dataset of size 672: 71.13095092773438 %.
Average loss: 0.707133639942516
proportion of labels in prediction: [tensor(0.6964), tensor(0.1979), tensor(0.1057)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80496054 0.55830389 0.48275862]
- f1 (average): 0.6153410162551314
- accuracy: 0.711309552192688

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.094313144683838
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9924508333206177
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9078524871305986 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 57!
Accuracy on dataset of size 672: 69.19642639160156 %.
Average loss: 0.7343476577238603
proportion of labels in prediction: [tensor(0.8259), tensor(0.0878), tensor(0.0863)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79876797 0.37320574 0.45962733]
- f1 (average): 0.5438670126550438
- accuracy: 0.6919642686843872

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0789034366607666
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0043119192123413
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9153049750761553 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 62!
Accuracy on dataset of size 672: 70.53571319580078 %.
Average loss: 0.7639460184357383
proportion of labels in prediction: [tensor(0.7485), tensor(0.2113), tensor(0.0402)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80694143 0.53424658 0.36923077]
- f1 (average): 0.5701395920811724
- accuracy: 0.7053571343421936


In [190]:
path_history = obtain_mean_history(pooled_max_pretrained, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0902756452560425
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0397809743881226
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9006156542084434 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 61!
Accuracy on dataset of size 672: 62.35118865966797 %.
Average loss: 0.876682395284826
proportion of labels in prediction: [tensor(0.8988), tensor(0.0967), tensor(0.0045)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75659824 0.26976744 0.05660377]
- f1 (average): 0.360989818638193
- accuracy: 0.6235119104385376

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1092349290847778
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9899702072143555
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8991328586231578 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 55!
Accuracy on dataset of size 672: 62.5 %.
Average loss: 0.8921718272295865
proportion of labels in prediction: [tensor(0.9464), tensor(0.0521), tensor(0.0015)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76398104 0.17297297 0.01923077]
- f1 (average): 0.3187282616192569
- accuracy: 0.625

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0993415117263794
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9690505862236023
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9076739387078718 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 69!
Accuracy on dataset of size 672: 60.41666793823242 %.
Average loss: 0.9077662283724005
proportion of labels in prediction: [tensor(0.7738), tensor(0.2113), tensor(0.0149)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.73695421 0.3630137  0.12389381]
- f1 (average): 0.40795390351421346
- accuracy: 0.6041666865348816

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0752605199813843
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8759329915046692
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9057751568880948 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 91!
Accuracy on dataset of size 672: 63.39285659790039 %.
Average loss: 0.8984722766009244
proportion of labels in prediction: [tensor(0.8646), tensor(0.1101), tensor(0.0253)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.764      0.29464286 0.18333333]
- f1 (average): 0.4139920634920635
- accuracy: 0.6339285969734192

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.105695366859436
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0434092283248901
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9036447134884921 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.9244691729545593
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 1.319921851158142
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8929828893054615 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 201/10000 || Item: 0/85 || Loss: 0.7867549061775208
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 1.2821563482284546
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.8963441252708435 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 301/10000 |

Epoch: 2501/10000 || Item: 0/85 || Loss: 1.0034199953079224
--------------------------------------------------
##### Epoch: 2501/10000 || Loss: 0.645277202129364
--------------------------------------------------
Validation || Epoch: 2501 || Loss: 0.8993629975752397 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 2601/10000 || Item: 0/85 || Loss: 0.8773769736289978
--------------------------------------------------
##### Epoch: 2601/10000 || Loss: 0.6896044611930847
--------------------------------------------------
Validation || Epoch: 2601 || Loss: 0.8988425243984569 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 2701/10000 || Item: 0/85 || Loss: 0.8763327598571777
--------------------------------------------------
##### Epoch: 2701/10000 || Loss: 0.7868524193763733
--------------------------------------------------
Validation || Epoch: 2701 || Loss: 0.9059645371003584 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Ep

Epoch: 5001/10000 || Item: 0/85 || Loss: 0.8928083181381226
--------------------------------------------------
##### Epoch: 5001/10000 || Loss: 1.3953959941864014
--------------------------------------------------
Validation || Epoch: 5001 || Loss: 0.8895294774662365 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 5101/10000 || Item: 0/85 || Loss: 0.8683613538742065
--------------------------------------------------
##### Epoch: 5101/10000 || Loss: 0.7241466045379639
--------------------------------------------------
Validation || Epoch: 5101 || Loss: 0.8980912132696672 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 5201/10000 || Item: 0/85 || Loss: 0.9704111218452454
--------------------------------------------------
##### Epoch: 5201/10000 || Loss: 1.094663381576538
--------------------------------------------------
Validation || Epoch: 5201 || Loss: 0.890712174502286 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epo

Epoch: 7501/10000 || Item: 0/85 || Loss: 0.8392974138259888
--------------------------------------------------
##### Epoch: 7501/10000 || Loss: 1.0813897848129272
--------------------------------------------------
Validation || Epoch: 7501 || Loss: 0.8951859853484414 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 7601/10000 || Item: 0/85 || Loss: 0.9864221811294556
--------------------------------------------------
##### Epoch: 7601/10000 || Loss: 0.753416895866394
--------------------------------------------------
Validation || Epoch: 7601 || Loss: 0.8959530971267007 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 7701/10000 || Item: 0/85 || Loss: 0.9221744537353516
--------------------------------------------------
##### Epoch: 7701/10000 || Loss: 0.9736170768737793
--------------------------------------------------
Validation || Epoch: 7701 || Loss: 0.8940104191953485 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Ep

Accuracy on dataset of size 672: 62.35118865966797 %.
Average loss: 0.9195525429465554
proportion of labels in prediction: [tensor(1.), tensor(0.), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76810266 0.         0.        ]
- f1 (average): 0.25603421937060805
- accuracy: 0.6235119104385376


### Sum pooled

In [191]:
path_history = obtain_mean_history(pooled_sum_pretrained, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.678933024406433
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.6816810369491577
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8064220114187761 || Accuracy: 0.6612184047698975 || F1-score: 0.4829433471752409
Early stopping at epoch 30!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.7885038473389365
proportion of labels in prediction: [tensor(0.7128), tensor(0.2128), tensor(0.0744)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80178174 0.55972696 0.40522876]
- f1 (average): 0.5889124859403455
- accuracy: 0.7038690447807312

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1401394605636597
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8598620891571045
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8176196921955455 || Accuracy: 0.6404160261154175 || F1-score: 0.36396288832429863
Early stopping at epoch 40!
Accuracy on dataset of size 672: 72.02381134033203 %.
Average loss: 0.8952876275235956
proportion of labels in prediction: [tensor(0.6994), tensor(0.2158), tensor(0.0848)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8143982  0.58305085 0.45      ]
- f1 (average): 0.615816349227533
- accuracy: 0.7202380895614624

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0784506797790527
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9166842699050903
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8573870333758268 || Accuracy: 0.6404160261154175 || F1-score: 0.34531878329609894
Early stopping at epoch 29!
Accuracy on dataset of size 672: 71.42857360839844 %.
Average loss: 0.8035102594982494
proportion of labels in prediction: [tensor(0.6711), tensor(0.2440), tensor(0.0848)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80689655 0.58598726 0.4625    ]
- f1 (average): 0.6184612709568782
- accuracy: 0.7142857313156128

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0932745933532715
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8824105262756348
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8603054230863397 || Accuracy: 0.6255571842193604 || F1-score: 0.27827678720682164
Early stopping at epoch 39!
Accuracy on dataset of size 672: 72.17262268066406 %.
Average loss: 0.9165262634103949
proportion of labels in prediction: [tensor(0.6696), tensor(0.2307), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81242808 0.58360656 0.50588235]
- f1 (average): 0.6339723295230296
- accuracy: 0.7217261791229248

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0838981866836548
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9220653176307678
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8925879922780123 || Accuracy: 0.6240713000297546 || F1-score: 0.30644148769599383
Early stopping at epoch 35!
Accuracy on dataset of size 672: 72.4702377319336 %.
Average loss: 0.8119913393800909
proportion of labels in prediction: [tensor(0.6786), tensor(0.2217), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81371429 0.58862876 0.50588235]
- f1 (average): 0.6360751337324227
- accuracy: 0.7247023582458496


In [192]:
path_history = obtain_mean_history(pooled_sum_pretrained, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.4307332038879395
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.2255232334136963
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8943042809312994 || Accuracy: 0.6240713000297546 || F1-score: 0.2598877839178142
Early stopping at epoch 55!
Accuracy on dataset of size 672: 68.1547622680664 %.
Average loss: 0.8286942893808539
proportion of labels in prediction: [tensor(0.7798), tensor(0.1905), tensor(0.0298)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7804878 0.5323741 0.2601626]
- f1 (average): 0.5243415024078298
- accuracy: 0.6815476417541504

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0660560131072998
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0853500366210938
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9064192284237255 || Accuracy: 0.6240713000297546 || F1-score: 0.2598877839178142
Early stopping at epoch 43!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.8946376009420915
proportion of labels in prediction: [tensor(0.7961), tensor(0.1399), tensor(0.0640)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78616352 0.49180328 0.39726027]
- f1 (average): 0.5584090248912353
- accuracy: 0.6904761791229248

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0890377759933472
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.994349479675293
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9262414086948741 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 52!
Accuracy on dataset of size 672: 71.13095092773438 %.
Average loss: 0.9046879681673917
proportion of labels in prediction: [tensor(0.7039), tensor(0.2202), tensor(0.0759)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79596413 0.58389262 0.46753247]
- f1 (average): 0.6157964035142234
- accuracy: 0.711309552192688

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0921019315719604
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9744298458099365
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9363620552149686 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 47!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.8467710018157959
proportion of labels in prediction: [tensor(0.6786), tensor(0.2158), tensor(0.1057)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79542857 0.57627119 0.50574713]
- f1 (average): 0.6258156281020103
- accuracy: 0.7098214030265808

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1097633838653564
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0565688610076904
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9436568238518455 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 67!
Accuracy on dataset of size 672: 68.45237731933594 %.
Average loss: 0.9079169901934537
proportion of labels in prediction: [tensor(0.6711), tensor(0.2381), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77471264 0.54193548 0.47560976]
- f1 (average): 0.5974192945488966
- accuracy: 0.6845238208770752


### CLS

In [193]:
path_history = obtain_mean_history(pooled_cls_pretrained, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1000295877456665
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7801932096481323
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8587136810476129 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 72.61904907226562 %.
Average loss: 0.6759935102679513
proportion of labels in prediction: [tensor(0.7500), tensor(0.1801), tensor(0.0699)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82340195 0.54612546 0.45333333]
- f1 (average): 0.6076202482501531
- accuracy: 0.726190447807312

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.081039309501648
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8311998844146729
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8527723713354631 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 40!
Accuracy on dataset of size 672: 72.91666412353516 %.
Average loss: 0.6840411153706637
proportion of labels in prediction: [tensor(0.7396), tensor(0.1741), tensor(0.0863)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8209607  0.54681648 0.50931677]
- f1 (average): 0.6256979827590136
- accuracy: 0.7291666865348816

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0732282400131226
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8942330479621887
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8833736777305603 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 48!
Accuracy on dataset of size 672: 72.91666412353516 %.
Average loss: 0.6757068634033203
proportion of labels in prediction: [tensor(0.7262), tensor(0.1905), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82249173 0.56115108 0.49056604]
- f1 (average): 0.6247362826179322
- accuracy: 0.7291666865348816

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0949422121047974
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9760374426841736
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.885305794802579 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 47!
Accuracy on dataset of size 672: 73.80952453613281 %.
Average loss: 0.7112683599645441
proportion of labels in prediction: [tensor(0.6726), tensor(0.2381), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82204363 0.59354839 0.56441718]
- f1 (average): 0.6600030643415539
- accuracy: 0.738095223903656

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0791540145874023
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.025408148765564
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8963234532963146 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 56!
Accuracy on dataset of size 672: 72.32142639160156 %.
Average loss: 0.7038171291351318
proportion of labels in prediction: [tensor(0.7113), tensor(0.1979), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81605351 0.55123675 0.51219512]
- f1 (average): 0.6264951275911711
- accuracy: 0.7232142686843872


In [194]:
path_history = obtain_mean_history(pooled_cls_pretrained, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


********** hidden_dim: [100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.072155475616455
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9787630438804626
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.899658896706321 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 61.30952453613281 %.
Average loss: 0.8787896578962152
proportion of labels in prediction: [tensor(0.9405), tensor(0.0580), tensor(0.0015)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75547098 0.14814815 0.01923077]
- f1 (average): 0.3076166324659823
- accuracy: 0.613095223903656

********** hidden_dim: [100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1162080764770508
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9712219834327698
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8967102224176581 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 62!
Accuracy on dataset of size 672: 62.7976188659668 %.
Average loss: 0.8612691326574846
proportion of labels in prediction: [tensor(0.8705), tensor(0.1116), tensor(0.0179)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75697211 0.29333333 0.15652174]
- f1 (average): 0.40227572800585093
- accuracy: 0.6279761791229248

********** hidden_dim: [100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.100602149963379
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0084004402160645
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9075343175367876 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 59!
Accuracy on dataset of size 672: 64.28571319580078 %.
Average loss: 0.8615717346018011
proportion of labels in prediction: [tensor(0.7872), tensor(0.1771), tensor(0.0357)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75738397 0.43122677 0.23622047]
- f1 (average): 0.474943734828309
- accuracy: 0.6428571343421936

********** hidden_dim: [100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0736695528030396
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8818268775939941
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9055995561859824 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 43!
Accuracy on dataset of size 672: 62.35118865966797 %.
Average loss: 0.9054542292248119
proportion of labels in prediction: [tensor(0.9509), tensor(0.0223), tensor(0.0268)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76559546 0.02424242 0.19834711]
- f1 (average): 0.3293949982728123
- accuracy: 0.6235119104385376

********** hidden_dim: [100, 100, 100, 100, 100]


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1050196886062622
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0802828073501587
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9039598269896074 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 57!
Accuracy on dataset of size 672: 61.755950927734375 %.
Average loss: 0.8949943401596763
proportion of labels in prediction: [tensor(0.9152), tensor(0.0774), tensor(0.0074)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77176015 0.12871287 0.05555556]
- f1 (average): 0.3186761938605209
- accuracy: 0.617559552192688


## Fine-tuned BERT

### Mean pooled

In [175]:
path_history = obtain_mean_history(pooled_mean, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

NameError: name 'pooled_mean' is not defined

In [None]:
path_history = obtain_mean_history(pooled_mean, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### Max pooled

In [None]:
path_history = obtain_mean_history(pooled_max, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
path_history = obtain_mean_history(pooled_max, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### Sum pooled

In [None]:
path_history = obtain_mean_history(pooled_sum, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
path_history = obtain_mean_history(pooled_sum, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### CLS

In [None]:
path_history = obtain_mean_history(pooled_cls, path_specifics)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
path_history = obtain_mean_history(pooled_cls, path_specifics, concatenate_current=False)
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=path_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

# Baseline 3: LSTM classification

# Baseline 4: FFN using signatures

First, we dimension reduce these and then take signatures. We use the path signature as input to the FFN for classification.

In [195]:
path_specifics = {"pad_by": "history",
                  "zero_padding": False,
                  "method": "k_last",
                  "k": 10,
                  "time_feature": None,
                  "embeddings": "dim_reduced",
                  "include_current_embedding": True}

In [196]:
def obtain_signatures_history(embeddings, path_specifics, dimension, sig_depth, concatenate_current=True):
    # dimension reduce
    reduction = nlpsig.DimReduce(method="gaussian_random_projection", n_components=dimension)
    # reduction = nlpsig.DimReduce(method="umap", n_components=dimension)
    embeddings_reduced = reduction.fit_transform(embeddings, random_state=seed)
    
    paths = nlpsig.PrepareData(anno_mi,
                               id_column="transcript_id",
                               label_column="client_talk_type",
                               embeddings=embeddings,
                               embeddings_reduced=embeddings_reduced)
    path = paths.pad(**path_specifics)
    # remove last two columns (which contains the id and the label)
    path = path[client_index][:,:,:-2].astype("float")
    
    # convert to torch tensor to compute signature using signatory
    path = torch.from_numpy(path).float()
    sig = signatory.signature(path, sig_depth).float()
    
    # concatenate with current embedding
    if concatenate_current:
        sig = torch.cat([sig, torch.tensor(embeddings[client_index])], dim=1)

    return sig

In [197]:
dimension = 5
sig_depth = 4

## SBERT 768

In [198]:
signature_history = obtain_signatures_history(sbert_768_embeddings, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 1548])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1170718669891357
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0491938591003418
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0329104553569446 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 33!
Accuracy on dataset of size 672: 68.89881134033203 %.
Average loss: 0.7490293329412286
proportion of labels in prediction: [tensor(0.7292), tensor(0.1815), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78987899 0.52205882 0.40490798]
- f1 (average): 0.5722819289627749
- accuracy: 0.6889880895614624

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1023259162902832
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0158472061157227
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9904605096036737 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 32!
Accuracy on dataset of size 672: 66.96428680419922 %.
Average loss: 0.8423257361758839
proportion of labels in prediction: [tensor(0.6964), tensor(0.2083), tensor(0.0952)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77790304 0.48965517 0.40718563]
- f1 (average): 0.5582479483749138
- accuracy: 0.6696428656578064

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0874525308609009
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0108987092971802
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9790441718968478 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.35277339816093445
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.047096773982048035
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0158505927432666 || Accuracy: 0.6627042889595032 || F1-score: 0.5273227238831266
Epoch: 201/10000 || Item: 0/85 || Loss: 0.307134211063385
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.03561379015445709
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.013514291156422 || Accuracy: 0.6627042889595032 || F1-score: 0.5273227238831266
Epoch: 301/10000

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0767083168029785
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0194463729858398
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9741700129075483 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 26!
Accuracy on dataset of size 672: 64.43452453613281 %.
Average loss: 0.9960732893510298
proportion of labels in prediction: [tensor(0.6756), tensor(0.3244), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79266896 0.47282609 0.        ]
- f1 (average): 0.42183168152464434
- accuracy: 0.644345223903656

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.069919466972351
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.971297025680542
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9626090797511014 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 25!
Accuracy on dataset of size 672: 62.5 %.
Average loss: 0.995502157644792
proportion of labels in prediction: [tensor(0.6771), tensor(0.3229), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78032037 0.43051771 0.        ]
- f1 (average): 0.4036126924347951
- accuracy: 0.625


In [199]:
signature_history = obtain_signatures_history(sbert_768_embeddings,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 780])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1303646564483643
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.091691493988037
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.081595540046692 || Accuracy: 0.5661218166351318 || F1-score: 0.26659070853300276
Early stopping at epoch 12!
Accuracy on dataset of size 672: 62.35118865966797 %.
Average loss: 0.9293696771968495
proportion of labels in prediction: [tensor(1.), tensor(0.), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76810266 0.         0.        ]
- f1 (average): 0.25603421937060805
- accuracy: 0.6235119104385376

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1389836072921753
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0764942169189453
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.080633813684637 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7909097671508789
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.34067660570144653
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0206401619044216 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 201/10000 || Item: 0/85 || Loss: 0.7610502243041992
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 1.6069424152374268
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.0220239487561313 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 301/10000 |

Epoch: 2501/10000 || Item: 0/85 || Loss: 0.7414960265159607
--------------------------------------------------
##### Epoch: 2501/10000 || Loss: 0.1530255824327469
--------------------------------------------------
Validation || Epoch: 2501 || Loss: 1.020157060839913 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 2601/10000 || Item: 0/85 || Loss: 0.6695468425750732
--------------------------------------------------
##### Epoch: 2601/10000 || Loss: 1.2219645977020264
--------------------------------------------------
Validation || Epoch: 2601 || Loss: 1.0266884782097556 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 2701/10000 || Item: 0/85 || Loss: 0.7066129446029663
--------------------------------------------------
##### Epoch: 2701/10000 || Loss: 1.8067954778671265
--------------------------------------------------
Validation || Epoch: 2701 || Loss: 1.0178581530397588 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch

Epoch: 5001/10000 || Item: 0/85 || Loss: 0.5487132668495178
--------------------------------------------------
##### Epoch: 5001/10000 || Loss: 0.8895431160926819
--------------------------------------------------
Validation || Epoch: 5001 || Loss: 1.0458341999487444 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 5101/10000 || Item: 0/85 || Loss: 0.6966171264648438
--------------------------------------------------
##### Epoch: 5101/10000 || Loss: 0.5476250648498535
--------------------------------------------------
Validation || Epoch: 5101 || Loss: 1.0273423086513171 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 5201/10000 || Item: 0/85 || Loss: 0.7137590646743774
--------------------------------------------------
##### Epoch: 5201/10000 || Loss: 1.2158581018447876
--------------------------------------------------
Validation || Epoch: 5201 || Loss: 1.0158035321669145 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoc

Epoch: 7501/10000 || Item: 0/85 || Loss: 0.6385011672973633
--------------------------------------------------
##### Epoch: 7501/10000 || Loss: 1.1602758169174194
--------------------------------------------------
Validation || Epoch: 7501 || Loss: 1.0288253318179736 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 7601/10000 || Item: 0/85 || Loss: 0.7711106538772583
--------------------------------------------------
##### Epoch: 7601/10000 || Loss: 0.8622919321060181
--------------------------------------------------
Validation || Epoch: 7601 || Loss: 1.0367966402660718 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch: 7701/10000 || Item: 0/85 || Loss: 0.6621609330177307
--------------------------------------------------
##### Epoch: 7701/10000 || Loss: 1.155216932296753
--------------------------------------------------
Validation || Epoch: 7701 || Loss: 1.046659225767309 || Accuracy: 0.5839524269104004 || F1-score: 0.2900596392089845
Epoch:

Accuracy on dataset of size 672: 61.30952453613281 %.
Average loss: 0.9993096535856073
proportion of labels in prediction: [tensor(0.9092), tensor(0.0908), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76116505 0.18957346 0.        ]
- f1 (average): 0.31691283608644305
- accuracy: 0.613095223903656

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.080559492111206
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0302709341049194
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0186785676262595 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7531773447990417
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.4756762385368347
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0462399450215427 || Accuracy: 0.5958395004272461 || F1-score: 0.2946432936752394
Epoch: 201/10000 || Item: 0/85 || Loss: 0.7362188100814819
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.28524696826934814
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.0541054172949358 || Accuracy: 0.5958395004272461 || F1-score: 0.2946432936752394
Epoch: 301/10000 |

Epoch: 2501/10000 || Item: 0/85 || Loss: 0.7218785881996155
--------------------------------------------------
##### Epoch: 2501/10000 || Loss: 0.8285083770751953
--------------------------------------------------
Validation || Epoch: 2501 || Loss: 1.0545342672954907 || Accuracy: 0.5973253846168518 || F1-score: 0.297558585048561
Epoch: 2601/10000 || Item: 0/85 || Loss: 0.6840920448303223
--------------------------------------------------
##### Epoch: 2601/10000 || Loss: 0.23172909021377563
--------------------------------------------------
Validation || Epoch: 2601 || Loss: 1.0426749207756736 || Accuracy: 0.5973253846168518 || F1-score: 0.297558585048561
Epoch: 2701/10000 || Item: 0/85 || Loss: 0.7298756241798401
--------------------------------------------------
##### Epoch: 2701/10000 || Loss: 0.25229018926620483
--------------------------------------------------
Validation || Epoch: 2701 || Loss: 1.0499002120711587 || Accuracy: 0.5973253846168518 || F1-score: 0.297558585048561
Epoch

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0874892473220825
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.041567325592041
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0157562765208157 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 38!
Accuracy on dataset of size 672: 59.6726188659668 %.
Average loss: 1.2987231612205505
proportion of labels in prediction: [tensor(0.8438), tensor(0.1280), tensor(0.0283)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7464503  0.22033898 0.1147541 ]
- f1 (average): 0.36051446189037933
- accuracy: 0.5967261791229248

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0989830493927002
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0823333263397217
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0345525308088823 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7649714350700378
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.8336163759231567
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.1429398655891418 || Accuracy: 0.5869241952896118 || F1-score: 0.2933159722222222
Epoch: 201/10000 || Item: 0/85 || Loss: 0.681190013885498
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 1.1246832609176636
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.1307478601282293 || Accuracy: 0.5869241952896118 || F1-score: 0.2933159722222222
Epoch: 301/10000 ||

## SBERT 384

In [200]:
signature_history = obtain_signatures_history(sbert_384_embeddings, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 1164])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0510205030441284
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9339765310287476
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9781227707862854 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 37!
Accuracy on dataset of size 672: 68.45237731933594 %.
Average loss: 0.8182277625257318
proportion of labels in prediction: [tensor(0.6920), tensor(0.1786), tensor(0.1295)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7918552  0.5037037  0.44210526]
- f1 (average): 0.5792213901605027
- accuracy: 0.6845238208770752

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.093340516090393
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0354981422424316
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0173125917261296 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 34!
Accuracy on dataset of size 672: 68.1547622680664 %.
Average loss: 0.9229160926558755
proportion of labels in prediction: [tensor(0.6860), tensor(0.2024), tensor(0.1116)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79318182 0.51048951 0.40449438]
- f1 (average): 0.569388570231267
- accuracy: 0.6815476417541504

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1014200448989868
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.064431071281433
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.015446814623746 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 45!
Accuracy on dataset of size 672: 64.73213958740234 %.
Average loss: 0.9659875360402194
proportion of labels in prediction: [tensor(0.6756), tensor(0.2872), tensor(0.0372)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79266896 0.43731778 0.21875   ]
- f1 (average): 0.48291224729132365
- accuracy: 0.6473214030265808

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1112401485443115
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.052091121673584
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0293906710364602 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 50!
Accuracy on dataset of size 672: 63.69047546386719 %.
Average loss: 1.0914304418997332
proportion of labels in prediction: [tensor(0.6875), tensor(0.2768), tensor(0.0357)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7877412  0.4047619  0.20472441]
- f1 (average): 0.4657425057963101
- accuracy: 0.636904776096344

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.099615454673767
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.083626389503479
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0153438557278027 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 50!
Accuracy on dataset of size 672: 63.83928680419922 %.
Average loss: 1.0310841582038186
proportion of labels in prediction: [tensor(0.6652), tensor(0.2827), tensor(0.0521)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79214781 0.41176471 0.23188406]
- f1 (average): 0.4785988566193287
- accuracy: 0.6383928656578064


In [201]:
signature_history = obtain_signatures_history(sbert_384_embeddings,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 780])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1309925317764282
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0654724836349487
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.075578440319408 || Accuracy: 0.5884100794792175 || F1-score: 0.27091551821302323
Early stopping at epoch 12!
Accuracy on dataset of size 672: 62.35118865966797 %.
Average loss: 0.9237872578881003
proportion of labels in prediction: [tensor(1.), tensor(0.), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76810266 0.         0.        ]
- f1 (average): 0.25603421937060805
- accuracy: 0.6235119104385376

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1385002136230469
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0628207921981812
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0754418156363748 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 36!
Accuracy on dataset of size 672: 61.16071319580078 %.
Average loss: 0.9524058645421808
proportion of labels in prediction: [tensor(0.9539), tensor(0.0387), tensor(0.0074)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75849057 0.09090909 0.01851852]
- f1 (average): 0.2893060584884484
- accuracy: 0.6116071343421936

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0802196264266968
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0205280780792236
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0158289670944214 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6703686118125916
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.40635591745376587
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0329773263497786 || Accuracy: 0.6121842265129089 || F1-score: 0.3035511955052185
Epoch: 201/10000 || Item: 0/85 || Loss: 0.7650904059410095
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.2539656162261963
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.0534041036259045 || Accuracy: 0.6121842265129089 || F1-score: 0.3035511955052185
Epoch: 301/10000 

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.08750581741333
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0394604206085205
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0141970677809282 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7286522388458252
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.36709776520729065
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.1960249543190002 || Accuracy: 0.5854383111000061 || F1-score: 0.32956290998530163
Epoch: 201/10000 || Item: 0/85 || Loss: 0.6445213556289673
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.9375444650650024
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.1644412983547558 || Accuracy: 0.5854383111000061 || F1-score: 0.32956290998530163
Early stopping at

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.099061131477356
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0810621976852417
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.033886107531461 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 44!
Accuracy on dataset of size 672: 59.52381134033203 %.
Average loss: 1.0715742653066462
proportion of labels in prediction: [tensor(0.9107), tensor(0.0625), tensor(0.0268)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.74490786 0.13541667 0.04958678]
- f1 (average): 0.3099704333254064
- accuracy: 0.5952380895614624


## Pretrained BERT

### Mean pooled

In [202]:
signature_history = obtain_signatures_history(pooled_mean_pretrained, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 1548])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 12.36815357208252
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 4.970185279846191
--------------------------------------------------
Validation || Epoch: 1 || Loss: 3.070787104693326 || Accuracy: 0.5512629747390747 || F1-score: 0.34108076344111987
Early stopping at epoch 59!
Accuracy on dataset of size 672: 61.755950927734375 %.
Average loss: 1.7640220468694514
proportion of labels in prediction: [tensor(0.7857), tensor(0.1741), tensor(0.0402)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75395987 0.33707865 0.2       ]
- f1 (average): 0.4303461749898161
- accuracy: 0.617559552192688

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 2.2733423709869385
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 3.630249500274658
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.3257284923033281 || Accuracy: 0.5765230059623718 || F1-score: 0.34099252887105136
Early stopping at epoch 33!
Accuracy on dataset of size 672: 61.755950927734375 %.
Average loss: 1.2828571091998706
proportion of labels in prediction: [tensor(0.7917), tensor(0.1518), tensor(0.0565)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75499474 0.34920635 0.17021277]
- f1 (average): 0.42480461918008067
- accuracy: 0.617559552192688

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.4547653198242188
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8083280324935913
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0537812818180432 || Accuracy: 0.5943536162376404 || F1-score: 0.29167526009441314
Early stopping at epoch 28!
Accuracy on dataset of size 672: 63.54166793823242 %.
Average loss: 1.2546936652877114
proportion of labels in prediction: [tensor(0.8080), tensor(0.1399), tensor(0.0521)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76923077 0.36065574 0.1884058 ]
- f1 (average): 0.4394307680123788
- accuracy: 0.6354166865348816

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1313300132751465
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8110309839248657
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9856763482093811 || Accuracy: 0.6210995316505432 || F1-score: 0.25542315918117936
Early stopping at epoch 30!
Accuracy on dataset of size 672: 61.30952453613281 %.
Average loss: 1.539837192405354
proportion of labels in prediction: [tensor(0.7232), tensor(0.2173), tensor(0.0595)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7558011  0.35810811 0.23776224]
- f1 (average): 0.45055715028090715
- accuracy: 0.613095223903656

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 0.9986205697059631
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.821330189704895
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9696343107657 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 27!
Accuracy on dataset of size 672: 62.5 %.
Average loss: 1.586189324205572
proportion of labels in prediction: [tensor(0.7128), tensor(0.2039), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75723831 0.3902439  0.30188679]
- f1 (average): 0.48312300074717357
- accuracy: 0.625


In [203]:
signature_history = obtain_signatures_history(pooled_mean_pretrained,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 780])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 10.014762878417969
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 3.304849863052368
--------------------------------------------------
Validation || Epoch: 1 || Loss: 4.3514087200164795 || Accuracy: 0.4591381847858429 || F1-score: 0.2953730684985743
Early stopping at epoch 19!
Accuracy on dataset of size 672: 58.03571319580078 %.
Average loss: 1.9879371361298994
proportion of labels in prediction: [tensor(0.8080), tensor(0.1324), tensor(0.0595)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.73596674 0.23430962 0.11188811]
- f1 (average): 0.3607214904286034
- accuracy: 0.5803571343421936

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 2.8463900089263916
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8160141110420227
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.5734508362683384 || Accuracy: 0.528974711894989 || F1-score: 0.3664071715027129
Early stopping at epoch 13!
Accuracy on dataset of size 672: 56.994049072265625 %.
Average loss: 1.2242309938777576
proportion of labels in prediction: [tensor(0.8557), tensor(0.1027), tensor(0.0417)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.722334   0.14611872 0.1221374 ]
- f1 (average): 0.33019671002182827
- accuracy: 0.569940447807312

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.8513818979263306
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.2975270748138428
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.1273438117720864 || Accuracy: 0.5720653533935547 || F1-score: 0.3091042633090347
Early stopping at epoch 35!
Accuracy on dataset of size 672: 58.92856979370117 %.
Average loss: 1.1692784970456904
proportion of labels in prediction: [tensor(0.8676), tensor(0.0818), tensor(0.0506)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.73852295 0.16585366 0.13138686]
- f1 (average): 0.3452544913140901
- accuracy: 0.5892857313156128

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0843513011932373
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8570678234100342
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9876446398821744 || Accuracy: 0.6240713000297546 || F1-score: 0.2601005307743682
Early stopping at epoch 25!
Accuracy on dataset of size 672: 58.92856979370117 %.
Average loss: 1.4376066381281072
proportion of labels in prediction: [tensor(0.8229), tensor(0.1161), tensor(0.0610)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7345679  0.23684211 0.16666667]
- f1 (average): 0.3793588910547974
- accuracy: 0.5892857313156128

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.082446575164795
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7274391651153564
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9836958755146373 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 34!
Accuracy on dataset of size 672: 58.33333206176758 %.
Average loss: 1.7019806991923938
proportion of labels in prediction: [tensor(0.7976), tensor(0.1577), tensor(0.0446)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.72670157 0.296875   0.10526316]
- f1 (average): 0.3762799095251217
- accuracy: 0.5833333134651184


### Max pooled

In [204]:
signature_history = obtain_signatures_history(pooled_max_pretrained, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 1548])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 41.86736297607422
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.32370734214782715
--------------------------------------------------
Validation || Epoch: 1 || Loss: 10.394998983903365 || Accuracy: 0.5319464802742004 || F1-score: 0.34935121774930217
Early stopping at epoch 26!
Accuracy on dataset of size 672: 57.14285659790039 %.
Average loss: 4.556642554023049
proportion of labels in prediction: [tensor(0.7857), tensor(0.1652), tensor(0.0491)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.71805702 0.2835249  0.10294118]
- f1 (average): 0.36817436762014605
- accuracy: 0.5714285969734192

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 10.795247077941895
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.251486897468567
--------------------------------------------------
Validation || Epoch: 1 || Loss: 3.5041382096030493 || Accuracy: 0.4962852895259857 || F1-score: 0.3553498062472953
Early stopping at epoch 47!
Accuracy on dataset of size 672: 57.58928680419922 %.
Average loss: 1.7531447302211414
proportion of labels in prediction: [tensor(0.7872), tensor(0.1726), tensor(0.0402)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.72151899 0.27067669 0.13846154]
- f1 (average): 0.37688573917754464
- accuracy: 0.5758928656578064

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 4.3066487312316895
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.1174490451812744
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.5548003803599963 || Accuracy: 0.5720653533935547 || F1-score: 0.31359240471312305
Early stopping at epoch 17!
Accuracy on dataset of size 672: 61.011905670166016 %.
Average loss: 1.147129405628551
proportion of labels in prediction: [tensor(0.8973), tensor(0.0908), tensor(0.0119)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7592955  0.19905213 0.01801802]
- f1 (average): 0.3254552165803221
- accuracy: 0.6101190447807312

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 2.816965341567993
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8566218614578247
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0994054783474316 || Accuracy: 0.5780088901519775 || F1-score: 0.28734479295882803
Early stopping at epoch 39!
Accuracy on dataset of size 672: 60.119049072265625 %.
Average loss: 1.2287907871333035
proportion of labels in prediction: [tensor(0.8467), tensor(0.1295), tensor(0.0238)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75303644 0.23628692 0.06722689]
- f1 (average): 0.3521834159448299
- accuracy: 0.601190447807312

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.064578652381897
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8209261894226074
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9802968881346963 || Accuracy: 0.6210995316505432 || F1-score: 0.25542315918117936
Epoch: 101/10000 || Item: 0/85 || Loss: 0.42258596420288086
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 1.0013890266418457
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.4297857176173816 || Accuracy: 0.5988112688064575 || F1-score: 0.41115125800543234
Epoch: 201/10000 || Item: 0/85 || Loss: 0.44800621271133423
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.4109724164009094
--------------------------------------------------
Validation || Epoch: 201 || Loss: 1.4138996601104736 || Accuracy: 0.6002971529960632 || F1-score: 0.4137447070634401
Early stopping a

In [205]:
signature_history = obtain_signatures_history(pooled_max_pretrained,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 780])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 43.17559051513672
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 9.364212036132812
--------------------------------------------------
Validation || Epoch: 1 || Loss: 15.230800888755105 || Accuracy: 0.44279345870018005 || F1-score: 0.3035218237972311
Early stopping at epoch 51!
Accuracy on dataset of size 672: 56.994049072265625 %.
Average loss: 4.35618029941212
proportion of labels in prediction: [tensor(0.7708), tensor(0.1607), tensor(0.0685)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.71291355 0.29457364 0.14765101]
- f1 (average): 0.385046068005891
- accuracy: 0.569940447807312

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 13.599725723266602
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0887997150421143
--------------------------------------------------
Validation || Epoch: 1 || Loss: 4.617391954768788 || Accuracy: 0.48439821600914 || F1-score: 0.32343614258256476
Early stopping at epoch 39!
Accuracy on dataset of size 672: 59.52381134033203 %.
Average loss: 1.5854438705877825
proportion of labels in prediction: [tensor(0.8170), tensor(0.1205), tensor(0.0625)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.73966942 0.25974026 0.16551724]
- f1 (average): 0.3883089742023911
- accuracy: 0.5952380895614624

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 5.205721378326416
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 6.816487789154053
--------------------------------------------------
Validation || Epoch: 1 || Loss: 2.1266170631755483 || Accuracy: 0.5408617854118347 || F1-score: 0.3081200844252588
Early stopping at epoch 15!
Accuracy on dataset of size 672: 58.03571319580078 %.
Average loss: 1.0712444999001243
proportion of labels in prediction: [tensor(0.9062), tensor(0.0729), tensor(0.0208)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.73346304 0.12060302 0.01709402]
- f1 (average): 0.2903866890629498
- accuracy: 0.5803571343421936

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.9819138050079346
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.1245191097259521
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0592117580500515 || Accuracy: 0.6062406897544861 || F1-score: 0.3250462856162642
Early stopping at epoch 12!
Accuracy on dataset of size 672: 61.45833206176758 %.
Average loss: 0.9830409288406372
proportion of labels in prediction: [tensor(0.9717), tensor(0.0253), tensor(0.0030)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75932836 0.05988024 0.01904762]
- f1 (average): 0.2794187389258442
- accuracy: 0.6145833134651184

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.3260524272918701
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7130478024482727
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0144050338051536 || Accuracy: 0.6181277632713318 || F1-score: 0.2846696731126907
Early stopping at epoch 12!
Accuracy on dataset of size 672: 62.05356979370117 %.
Average loss: 0.9950303597883745
proportion of labels in prediction: [tensor(0.9792), tensor(0.0208), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.76508821 0.06097561 0.        ]
- f1 (average): 0.2753546059137472
- accuracy: 0.6205357313156128


### Sum pooled

In [206]:
signature_history = obtain_signatures_history(pooled_sum_pretrained, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 1548])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 9646180.0
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 750286.75
--------------------------------------------------
Validation || Epoch: 1 || Loss: 3691685.3181818184 || Accuracy: 0.4472511112689972 || F1-score: 0.3391099913158737
Early stopping at epoch 21!
Accuracy on dataset of size 672: 53.27381134033203 %.
Average loss: 2804424.9261363638
proportion of labels in prediction: [tensor(0.6860), tensor(0.2262), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.67954545 0.2781457  0.20987654]
- f1 (average): 0.38918923103985653
- accuracy: 0.5327380895614624

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 13514473.0
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1135117.375
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1282761.9375 || Accuracy: 0.47696879506111145 || F1-score: 0.3518560625458767
Early stopping at epoch 25!
Accuracy on dataset of size 672: 47.1726188659668 %.
Average loss: 755310.5710227273
proportion of labels in prediction: [tensor(0.5833), tensor(0.3318), tensor(0.0848)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.61159063 0.28954424 0.1875    ]
- f1 (average): 0.36287828825940016
- accuracy: 0.4717261791229248

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 4648848.5
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 324156.4375
--------------------------------------------------
Validation || Epoch: 1 || Loss: 504357.2507102273 || Accuracy: 0.5141158699989319 || F1-score: 0.31151505894239595
Early stopping at epoch 24!
Accuracy on dataset of size 672: 51.636905670166016 %.
Average loss: 176146.10795454544
proportion of labels in prediction: [tensor(0.6577), tensor(0.2664), tensor(0.0759)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.66434379 0.29179331 0.16883117]
- f1 (average): 0.3749894227320278
- accuracy: 0.5163690447807312

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 3004850.5
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 8283.3017578125
--------------------------------------------------
Validation || Epoch: 1 || Loss: 202068.43678977274 || Accuracy: 0.6002971529960632 || F1-score: 0.31219192788378086
Early stopping at epoch 19!
Accuracy on dataset of size 672: 46.5773811340332 %.
Average loss: 56099.005415482956
proportion of labels in prediction: [tensor(0.5952), tensor(0.2917), tensor(0.1131)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.61782662 0.28323699 0.12290503]
- f1 (average): 0.34132287999307726
- accuracy: 0.4657738208770752

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 749476.4375
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 17405.90234375
--------------------------------------------------
Validation || Epoch: 1 || Loss: 42518.084783380684 || Accuracy: 0.5334323644638062 || F1-score: 0.32656029180214935
Early stopping at epoch 42!
Accuracy on dataset of size 672: 51.78571319580078 %.
Average loss: 8601.425892223011
proportion of labels in prediction: [tensor(0.6741), tensor(0.2574), tensor(0.0685)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.66055046 0.29102167 0.17449664]
- f1 (average): 0.37535625827917457
- accuracy: 0.5178571343421936


In [207]:
signature_history = obtain_signatures_history(pooled_sum_pretrained,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 780])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 32241886.0
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 4276127.0
--------------------------------------------------
Validation || Epoch: 1 || Loss: 5265692.409090909 || Accuracy: 0.37444278597831726 || F1-score: 0.2946142118966263
Early stopping at epoch 30!
Accuracy on dataset of size 672: 51.93452453613281 %.
Average loss: 3327280.5
proportion of labels in prediction: [tensor(0.6801), tensor(0.2143), tensor(0.1057)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.66666667 0.29251701 0.16091954]
- f1 (average): 0.3733677378997577
- accuracy: 0.519345223903656

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 6498777.0
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1248412.25
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1394622.0113636365 || Accuracy: 0.4962852895259857 || F1-score: 0.3341336637145378
Early stopping at epoch 29!
Accuracy on dataset of size 672: 52.97618865966797 %.
Average loss: 959525.3267045454
proportion of labels in prediction: [tensor(0.7113), tensor(0.2173), tensor(0.0714)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.68227425 0.27027027 0.13245033]
- f1 (average): 0.3616649496292456
- accuracy: 0.5297619104385376

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 7915275.0
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 776393.75
--------------------------------------------------
Validation || Epoch: 1 || Loss: 519197.6122159091 || Accuracy: 0.4933135211467743 || F1-score: 0.3531816632344769
Early stopping at epoch 40!
Accuracy on dataset of size 672: 51.488094329833984 %.
Average loss: 176253.0909090909
proportion of labels in prediction: [tensor(0.7039), tensor(0.2158), tensor(0.0804)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.67040359 0.2440678  0.14012739]
- f1 (average): 0.35153292419638255
- accuracy: 0.5148809552192688

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 2379226.5
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1228.6171875
--------------------------------------------------
Validation || Epoch: 1 || Loss: 197744.39382102274 || Accuracy: 0.5156017541885376 || F1-score: 0.35254415560248226
Early stopping at epoch 15!
Accuracy on dataset of size 672: 56.39881134033203 %.
Average loss: 80701.58220880682
proportion of labels in prediction: [tensor(0.8318), tensor(0.0997), tensor(0.0685)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.72801636 0.14746544 0.09395973]
- f1 (average): 0.3231471764166143
- accuracy: 0.5639880895614624

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 565282.0
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 13193.2734375
--------------------------------------------------
Validation || Epoch: 1 || Loss: 55157.63831676136 || Accuracy: 0.5720653533935547 || F1-score: 0.3683501325308396
Early stopping at epoch 35!
Accuracy on dataset of size 672: 48.80952453613281 %.
Average loss: 9056.40780362216
proportion of labels in prediction: [tensor(0.6592), tensor(0.2113), tensor(0.1295)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.6450116  0.23287671 0.16842105]
- f1 (average): 0.34876978862947344
- accuracy: 0.488095223903656


### CLS

In [208]:
signature_history = obtain_signatures_history(pooled_cls_pretrained, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 1548])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 10.57669734954834
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 2.36376953125
--------------------------------------------------
Validation || Epoch: 1 || Loss: 2.4741791053251787 || Accuracy: 0.5364041328430176 || F1-score: 0.3467733673264639
Early stopping at epoch 56!
Accuracy on dataset of size 672: 62.2023811340332 %.
Average loss: 1.6854703697291287
proportion of labels in prediction: [tensor(0.7768), tensor(0.1786), tensor(0.0446)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75451647 0.38518519 0.16541353]
- f1 (average): 0.4350383969527471
- accuracy: 0.6220238208770752

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 2.5529911518096924
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.6177814602851868
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.1614464900710366 || Accuracy: 0.6062406897544861 || F1-score: 0.3556703459870922
Early stopping at epoch 37!
Accuracy on dataset of size 672: 61.16071319580078 %.
Average loss: 1.1803450746969744
proportion of labels in prediction: [tensor(0.7946), tensor(0.1622), tensor(0.0432)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75131165 0.30888031 0.1969697 ]
- f1 (average): 0.4190538844263924
- accuracy: 0.6116071343421936

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.468880295753479
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9828745722770691
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0218017047101802 || Accuracy: 0.5869241952896118 || F1-score: 0.28628988499656216
Early stopping at epoch 46!
Accuracy on dataset of size 672: 61.60714340209961 %.
Average loss: 1.1165391206741333
proportion of labels in prediction: [tensor(0.7842), tensor(0.1711), tensor(0.0446)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.74841438 0.36226415 0.18045113]
- f1 (average): 0.430376551694766
- accuracy: 0.6160714030265808

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1732434034347534
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.688827633857727
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9776950857856057 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 25!
Accuracy on dataset of size 672: 60.71428680419922 %.
Average loss: 1.4647020914337852
proportion of labels in prediction: [tensor(0.7068), tensor(0.2307), tensor(0.0625)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75615213 0.36065574 0.20689655]
- f1 (average): 0.44123480490289935
- accuracy: 0.6071428656578064

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0346287488937378
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8615090847015381
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9569483724507418 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 24!
Accuracy on dataset of size 672: 61.011905670166016 %.
Average loss: 1.5974636511369185
proportion of labels in prediction: [tensor(0.7098), tensor(0.2173), tensor(0.0729)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.75446429 0.34459459 0.27631579]
- f1 (average): 0.4584582232608548
- accuracy: 0.6101190447807312


In [209]:
signature_history = obtain_signatures_history(pooled_cls_pretrained,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

signature_history.shape = torch.Size([6725, 780])

********** hidden_dim: [100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 7.455470085144043
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 5.963819980621338
--------------------------------------------------
Validation || Epoch: 1 || Loss: 3.0283126397566362 || Accuracy: 0.4591381847858429 || F1-score: 0.3215615329780235
Early stopping at epoch 20!
Accuracy on dataset of size 672: 61.16071319580078 %.
Average loss: 1.650571259585294
proportion of labels in prediction: [tensor(0.8185), tensor(0.1414), tensor(0.0402)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.74716202 0.30204082 0.18461538]
- f1 (average): 0.41127274121524454
- accuracy: 0.6116071343421936

********** hidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 3.409783124923706
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9901486039161682
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.5267160155556418 || Accuracy: 0.48736998438835144 || F1-score: 0.32956307880308083
Early stopping at epoch 40!
Accuracy on dataset of size 672: 59.82143020629883 %.
Average loss: 1.1799353198571638
proportion of labels in prediction: [tensor(0.8438), tensor(0.1205), tensor(0.0357)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.74036511 0.24242424 0.14173228]
- f1 (average): 0.3748405458168918
- accuracy: 0.5982142686843872

********** hidden_dim: [100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.4429852962493896
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.142116665840149
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0459776791659268 || Accuracy: 0.5958395004272461 || F1-score: 0.28203765227021044
Early stopping at epoch 32!
Accuracy on dataset of size 672: 57.44047546386719 %.
Average loss: 1.2916027849370784
proportion of labels in prediction: [tensor(0.8051), tensor(0.1429), tensor(0.0521)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.725      0.24390244 0.11594203]
- f1 (average): 0.36161482266996586
- accuracy: 0.574404776096344

********** hidden_dim: [100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1479626893997192
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8195385336875916
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9821810668165033 || Accuracy: 0.6210995316505432 || F1-score: 0.2590895972714155
Early stopping at epoch 29!
Accuracy on dataset of size 672: 59.0773811340332 %.
Average loss: 1.3834776282310486
proportion of labels in prediction: [tensor(0.8095), tensor(0.1369), tensor(0.0536)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.73520249 0.24793388 0.18705036]
- f1 (average): 0.39006224540719625
- accuracy: 0.5907738208770752

********** hidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0760245323181152
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8654565215110779
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9872191548347473 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Early stopping at epoch 24!
Accuracy on dataset of size 672: 59.6726188659668 %.
Average loss: 1.5003969073295593
proportion of labels in prediction: [tensor(0.7649), tensor(0.1741), tensor(0.0610)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.74169346 0.32209738 0.16666667]
- f1 (average): 0.4101525022981723
- accuracy: 0.5967261791229248


### Fine-tuned BERT

### Mean pooled

In [None]:
signature_history = obtain_signatures_history(pooled_mean, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
signature_history = obtain_signatures_history(pooled_mean,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### Max pooled

In [None]:
signature_history = obtain_signatures_history(pooled_max,
                                              path_specifics,
                                              dimension,
                                              sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
signature_history = obtain_signatures_history(pooled_max,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### Sum pooled

In [None]:
signature_history = obtain_signatures_history(pooled_sum, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
signature_history = obtain_signatures_history(pooled_sum,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### CLS

In [None]:
signature_history = obtain_signatures_history(pooled_cls, path_specifics, dimension, sig_depth)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

In [None]:
signature_history = obtain_signatures_history(pooled_cls,
                                              path_specifics,
                                              dimension,
                                              sig_depth,
                                              concatenate_current=False)
print(f"signature_history.shape = {signature_history.shape}")
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=signature_history,
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

# StackedDeepSigNet

## Obtaining path by looking at post history

We can obtain a path by looking at the history of each post. Here we look at the last 10 posts (and pad with vectors of zeros if there are less than 10 posts) including the current post.

We only want to consider paths that correspond to a client's utterance as we want to model a change in mood at that time. Their history will still contain the therapist's utterances too.

In [210]:
time_features = ["time_encoding", "timeline_index"]
path_specifics = {"pad_by": "history",
                  "zero_padding": True,
                  "method": "k_last",
                  "k": 10,
                  "time_feature": time_features,
                  "standardise_method": ["minmax", None],
                  "embeddings": "dim_reduced",
                  "include_current_embedding": True,
                  "pad_from_below": False}

In [211]:
def obtain_SDSN_input(embeddings, path_specifics):
    reduction = nlpsig.DimReduce(method="gaussian_random_projection", n_components=50)
    embeddings_reduced = reduction.fit_transform(embeddings, random_state=seed)
    
    paths = nlpsig.PrepareData(anno_mi,
                               id_column="transcript_id",
                               label_column="client_talk_type",
                               embeddings=embeddings,
                               embeddings_reduced=embeddings_reduced)
    
    paths.pad(**path_specifics)
    
    paths.array_padded = paths.array_padded[client_index]
    paths.embeddings = paths.embeddings[client_index]
    paths.embeddings_reduced = paths.embeddings_reduced[client_index]
    
    return paths.get_torch_path_for_SDSN(
        include_time_features_in_path=True,
        include_time_features_in_input=True,
        include_embedding_in_input=True,
        reduced_embeddings=False
    )

In [212]:
def implement_sdsn(x_data,
                   y_data,
                   sig_depth,
                   input_channels,
                   output_channels,
                   lstm_hidden_dim,
                   ffn_hidden_dim,
                   BiLSTM,
                   learning_rate,
                   loss,
                   gamma = 0):
    SDSN_args = {
        "input_channels": input_channels,
        "output_channels": output_channels,
        "num_time_features": len(time_features),
        "embedding_dim": x_data.shape[2]-input_channels-len(time_features),
        "sig_depth": sig_depth,
        "hidden_dim_lstm": lstm_hidden_dim,
        "hidden_dim_ffn": ffn_hidden_dim,
        "output_dim": len(label_to_id),
        "dropout_rate": 0.1,
        "augmentation_type": "Conv1d",
        "BiLSTM": BiLSTM,
        "comb_method": "concatenation"
    }
    
    sdsn_model = StackedDeepSigNet(**SDSN_args)
    # print(sdsn_model)
    
    # split dataset
    train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),
                                       y_data=torch.tensor(y_data),
                                       train_size=0.8,
                                       valid_size=0.5,
                                       shuffle=True,
                                       as_DataLoader=True,
                                       seed=seed)
    
    # define loss
    if loss == "focal":    
        criterion = FocalLoss(gamma = gamma)
    elif loss == "cross_entropy":
        criterion = torch.nn.CrossEntropyLoss()

    # define optimizer
    optimizer = torch.optim.Adam(sdsn_model.parameters(), lr=learning_rate)
    # define scheduler for adjusting the learning rate
    scheduler = ReduceLROnPlateau(optimizer, 'min')
    # scheduler = StepLR(optimizer, step_size = 10, gamma = 0.5)
    # scheduler = CosineAnnealingWarmRestarts(optimizer, 
    #                                         T_0 = 8,# Number of iterations for the first restart
    #                                         T_mult = 1, # A factor increases TiTi after a restart
    #                                         eta_min = learning_rate*0.1)
    # scheduler = None
    
    sdsn_model = training_pytorch(model=sdsn_model,
                                  train_loader=train,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  num_epochs=10000,
                                  scheduler=scheduler,
                                  valid_loader=valid,
                                  early_stopping=True,
                                  early_stopping_metric="f1",
                                  patience=100,
                                  verbose=True,
                                  verbose_epoch=100,
                                  seed=seed)

    pred, label = testing_pytorch(sdsn_model, test, criterion)
    print(f"proportion of labels in prediction: {[sum(pred==i)/len(pred) for i in label_to_id.values()]}")
    print(f"proportion of labels in data: {[sum(label==i)/len(label) for i in label_to_id.values()]}")
    
    f1_scores = metrics.f1_score(label, pred, average=None)
    print(f"- f1: {f1_scores}")
    print(f"- f1 (average): {sum(f1_scores)/len(f1_scores)}")
    print(f"- accuracy: {sum(pred==label)/len(pred)}")
    
    return sdsn_model

In [213]:
lstm_hidden_dim_trial = [[8,8], [12,12,8], [12,12,12,8]]
ffn_hidden_dim_trial = [[100]*i for i in range(2, 6)]
sig_depth = 3
output_channels = 10
BiLSTM = True
learning_rate = 1e-4

## SBERT 768

In [214]:
input_channels

52

In [215]:
from __future__ import annotations
import signatory
import torch
import torch.nn as nn


class StackedDeepSigNet(nn.Module):
    """
    Stacked Deep Signature Neural Network for classification.
    """

    def __init__(
        self,
        input_channels: int,
        output_channels: int,
        num_time_features: int,
        embedding_dim: int,
        sig_depth: int,
        hidden_dim_lstm: list[int] | int,
        hidden_dim_ffn: list[int] | int,
        output_dim: int,
        dropout_rate: float,
        augmentation_type: str = "Conv1d",
        augmentation_args: dict | None = None,
        hidden_dim_aug: list[int] | int | None = None,
        BiLSTM: bool = False,
        comb_method: str = "gated_addition",
    ):
        """
        Stacked Deep Signature Neural Network for classification.

        Parameters
        ----------
        input_channels : int
            Dimension of the embeddings that will be passed in.
        output_channels : int
            Requested dimension of the embeddings after convolution layer.
        num_time_features : int
            Number of time features to add to FFN input. If none, set to zero.
        embedding_dim : int
            Dimension of embedding to add to FFN input. If none, set to zero.
        sig_depth : int
            The depth to truncate the path signature at.
        hidden_dim_lstm : list[int] | int
            Dimensions of the hidden layers in the LSTM blocks.
        hidden_dim_ffn : list[int] | int
            Dimension of the hidden layers in the FFN.
        output_dim : int
            Dimension of the output layer in the FFN.
        dropout_rate : float
            Dropout rate in the FFN.
        augmentation_type : str, optional
            Method of augmenting the path, by default "Conv1d".
            Options are:
            - "Conv1d": passes path through 1D convolution layer.
            - "signatory": passes path through `Augment` layer from `signatory` package.
        augmentation_args : dict | None, optional
            Arguments to pass into `torch.Conv1d` or `signatory.Augment`, by default None.
            If None, by default will set `kernel_size=3`, `stride=1`, `padding=0`.
        hidden_dim_aug : list[int] | int | None
            Dimensions of the hidden layers in the augmentation layer.
            Passed into `Augment` class from `signatory` package if
            `augmentation_type='signatory'`, by default None.
        BiLSTM : bool, optional
            Whether or not a birectional LSTM is used,
            by default False (unidirectional LSTM is used in this case).
        comb_method : str, optional
            Determines how to combine the path signature and embeddings,
            by default "gated_addition".
            Options are:
            - concatenation: concatenation of path signature and embedding vector
            - gated_addition: element-wise addition of path signature and embedding vector
        """
        super(StackedDeepSigNet, self).__init__()
        self.input_channels = input_channels
        
        if isinstance(hidden_dim_lstm, int):
            hidden_dim_lstm = [hidden_dim_lstm]
        if isinstance(hidden_dim_ffn, int):
            hidden_dim_ffn = [hidden_dim_ffn]
        self.hidden_dim_lstm = hidden_dim_lstm
        self.hidden_dim_ffn = hidden_dim_ffn
        
        self.embedding_dim = embedding_dim
        self.num_time_features = num_time_features
        if comb_method not in ["concatenation", "gated_addition"]:
            raise ValueError(
                "`comb_method` must be either 'concatenation' or 'gated_addition'."
            )
        self.comb_method = comb_method
        if augmentation_type not in ["Conv1d", "signatory"]:
            raise ValueError("`augmentation_type` must be 'Conv1d' or 'signatory'.")
        
        self.augmentation_type = augmentation_type
        if isinstance(hidden_dim_aug, int):
            hidden_dim_aug = [hidden_dim_aug]
        elif hidden_dim_aug is None:
            hidden_dim_aug = []
        self.hidden_dim_aug = hidden_dim_aug
        if augmentation_args is None:
            augmentation_args = {"kernel_size": 3,
                                 "stride": 1,
                                 "padding": 1}
        # convolution
        self.conv = nn.Conv1d(
            in_channels=input_channels,
            out_channels=output_channels,
            **augmentation_args,
        )
        self.augment = signatory.Augment(
            in_channels=input_channels,
            layer_sizes=self.hidden_dim_aug + [output_channels],
            include_original=False,
            include_time=False,
            **augmentation_args,
        )
        # non-linearity
        self.tanh1 = nn.Tanh()

        self.signature_layers = []
        self.lstm_layers = []
        for l in range(len(self.hidden_dim_lstm)):
            self.signature_layers.append(signatory.LogSignature(depth=sig_depth, stream=True))
            if l == 0:
                input_dim_lstm = signatory.logsignature_channels(output_channels, sig_depth)
            else:
                input_dim_lstm = signatory.logsignature_channels(self.hidden_dim_lstm[l-1], sig_depth)
            self.lstm_layers.append(nn.LSTM(
                input_size=input_dim_lstm,
                hidden_size=self.hidden_dim_lstm[l],
                num_layers=1,
                batch_first=True,
                bidirectional=False if l!=(len(self.hidden_dim_lstm)-1) else BiLSTM,
            ))
        
        self.signature_layers = nn.ModuleList(self.signature_layers)
        self.lstm_layers = nn.ModuleList(self.lstm_layers)

        # signature without lift (for passing into FFN)
        mult = 2 if BiLSTM else 1
        self.signature2 = signatory.LogSignature(depth=sig_depth, stream=False)

        # find dimension of features to pass through FFN
        if self.comb_method == "concatenation":
            input_dim = (
                signatory.logsignature_channels(
                    in_channels=mult * self.hidden_dim_lstm[-1], depth=sig_depth
                )
                + self.num_time_features
                + self.embedding_dim
            )
        elif self.comb_method == "gated_addition":
            input_dim = self.embedding_dim
            input_gated_linear = (
                signatory.logsignature_channels(
                    in_channels=mult * self.hidden_dim_lstm[-1], depth=sig_depth
                )
                + self.num_time_features
            )
            if self.embedding_dim > 0:
                self.fc_scale = nn.Linear(input_gated_linear, self.embedding_dim)
                self.scaler = torch.nn.Parameter(torch.zeros(1, self.embedding_dim))
            else:
                self.fc_scale = nn.Linear(input_gated_linear, input_gated_linear)
                self.scaler = torch.nn.Parameter(torch.zeros(1, input_gated_linear))
            # non-linearity
            self.tanh2 = nn.Tanh()

        # FFN: input layer
        self.ffn_input_layer = nn.Linear(input_dim, self.hidden_dim_ffn[0])
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        input_dim = self.hidden_dim_ffn[0]
        
        # FFN: hidden layers
        self.ffn_linear_layers = []
        self.ffn_non_linear_layers = []
        self.dropout_layers = []
        for l in range(len(self.hidden_dim_ffn)):
            self.ffn_linear_layers.append(nn.Linear(input_dim, self.hidden_dim_ffn[l]))
            self.ffn_non_linear_layers.append(nn.ReLU())
            self.dropout_layers.append(nn.Dropout(dropout_rate))
            input_dim = self.hidden_dim_ffn[l]
        
        self.ffn_linear_layers = nn.ModuleList(self.ffn_linear_layers)
        self.ffn_non_linear_layers = nn.ModuleList(self.ffn_non_linear_layers)
        self.dropout_layers = nn.ModuleList(self.dropout_layers)
        
        # FFN: readout
        self.ffn_final_layer = nn.Linear(input_dim, output_dim)

    def forward(self, x: torch.Tensor):
        # x has dimensions [batch, length of signal, channels]
        
        print(f"input size: {x.shape}")
        
        # convolution
        if self.augmentation_type == "Conv1d":
            # input has dimensions [batch, length of signal, channels]
            # swap dimensions to get [batch, channels, length of signal]
            # (nn.Conv1d expects this)
            out = torch.transpose(x, 1, 2)
            # get only the path information
            out = self.conv(out[:, : self.input_channels, :])
            out = self.tanh1(out)
            # make output have dimensions [batch, length of signal, channels]
            out = torch.transpose(out, 1, 2)
        elif self.augmentation_type == "signatory":
            # input has dimensions [batch, length of signal, channels]
            # (signatory.Augment expects this)
            # and get only the path information
            # output has dimensions [batch, length of signal, channels]
            out = self.augment(x[:, :, : self.input_channels])

        print(f"after conv: {out.shape}")
        
        # take signature lifts and lstm
        for l in range(len(self.hidden_dim_lstm)):
            out = self.signature_layers[l](out)
            print(f"after signature: {out.shape}")
            out, _ = self.lstm_layers[l](out)
            print(f"after lstm: {out.shape}")
        
        print(f"after snwu: {out.shape}")
        # signature
        out = self.signature2(out)
        print(f"after last signature: {out.shape}")

        # combine last post embedding
        if x.shape[2] > self.input_channels:
            # we have things to concatenate to the path
            if self.comb_method == "concatenation":
                if self.num_time_features > 0:
                    # concatenate any time features
                    # take the maximum for the latest time
                    out = torch.cat(
                        (
                            out,
                            x[
                                :,
                                :,
                                self.input_channels : (
                                    self.input_channels + self.num_time_features
                                ),
                            ].max(1)[0],
                        ),
                        dim=1,
                    )
                if x.shape[2] > self.input_channels + self.num_time_features:
                    # concatenate current post embedding if provided
                    out = torch.cat(
                        (
                            out,
                            x[:, 0, (self.input_channels + self.num_time_features) :],
                        ),
                        dim=1,
                    )
            elif self.comb_method == "gated_addition":
                if self.num_time_features > 0:
                    # concatenate any time features
                    out_gated = torch.cat(
                        (
                            out,
                            x[
                                :,
                                :,
                                self.input_channels : (
                                    self.input_channels + self.num_time_features
                                ),
                            ].max(1)[0],
                        ),
                        dim=1,
                    )
                else:
                    out_gated = out
                out_gated = self.fc_scale(out_gated.float())
                out_gated = self.tanh2(out_gated)
                out_gated = torch.mul(self.scaler, out_gated)
                if x.shape[2] > self.input_channels + self.num_time_features:
                    # concatenate current post embedding if provided
                    out = (
                        out_gated
                        + x[:, 0, (self.input_channels + self.num_time_features) :],
                    )
                else:
                    out = out_gated

        # FFN: input layer
        out = self.ffn_input_layer(out)
        out = self.relu(out)
        out = self.dropout(out)
        
        # FFN: hidden layers    
        for l in range(len(self.hidden_dim_ffn)):
            out = self.ffn_linear_layers[l](out)
            out = self.ffn_non_linear_layers[l](out)
            out = self.dropout_layers[l](out)

        # FFN: readout
        out = self.ffn_final_layer(out)

        return out

In [216]:
from __future__ import annotations
import signatory
import torch
import torch.nn as nn


class StackedDeepSigNet(nn.Module):
    """
    Stacked Deep Signature Neural Network for classification.
    """

    def __init__(
        self,
        input_channels: int,
        output_channels: int,
        num_time_features: int,
        embedding_dim: int,
        sig_depth: int,
        hidden_dim_lstm: list[int] | int,
        hidden_dim_ffn: list[int] | int,
        output_dim: int,
        dropout_rate: float,
        augmentation_type: str = "Conv1d",
        augmentation_args: dict | None = None,
        hidden_dim_aug: list[int] | int | None = None,
        BiLSTM: bool = False,
        comb_method: str = "gated_addition",
    ):
        """
        Stacked Deep Signature Neural Network for classification.

        Parameters
        ----------
        input_channels : int
            Dimension of the embeddings that will be passed in.
        output_channels : int
            Requested dimension of the embeddings after convolution layer.
        num_time_features : int
            Number of time features to add to FFN input. If none, set to zero.
        embedding_dim : int
            Dimension of embedding to add to FFN input. If none, set to zero.
        sig_depth : int
            The depth to truncate the path signature at.
        hidden_dim_lstm : list[int] | int
            Dimensions of the hidden layers in the LSTM blocks.
        hidden_dim_ffn : list[int] | int
            Dimension of the hidden layers in the FFN.
        output_dim : int
            Dimension of the output layer in the FFN.
        dropout_rate : float
            Dropout rate in the FFN.
        augmentation_type : str, optional
            Method of augmenting the path, by default "Conv1d".
            Options are:
            - "Conv1d": passes path through 1D convolution layer.
            - "signatory": passes path through `Augment` layer from `signatory` package.
        augmentation_args : dict | None, optional
            Arguments to pass into `torch.Conv1d` or `signatory.Augment`, by default None.
            If None, by default will set `kernel_size=3`, `stride=1`, `padding=0`.
        hidden_dim_aug : list[int] | int | None
            Dimensions of the hidden layers in the augmentation layer.
            Passed into `Augment` class from `signatory` package if
            `augmentation_type='signatory'`, by default None.
        BiLSTM : bool, optional
            Whether or not a birectional LSTM is used,
            by default False (unidirectional LSTM is used in this case).
        comb_method : str, optional
            Determines how to combine the path signature and embeddings,
            by default "gated_addition".
            Options are:
            - concatenation: concatenation of path signature and embedding vector
            - gated_addition: element-wise addition of path signature and embedding vector
        """
        super(StackedDeepSigNet, self).__init__()
        self.input_channels = input_channels
        
        if isinstance(hidden_dim_lstm, int):
            hidden_dim_lstm = [hidden_dim_lstm]
        if isinstance(hidden_dim_ffn, int):
            hidden_dim_ffn = [hidden_dim_ffn]
        self.hidden_dim_lstm = hidden_dim_lstm
        self.hidden_dim_ffn = hidden_dim_ffn
        
        self.embedding_dim = embedding_dim
        self.num_time_features = num_time_features
        if comb_method not in ["concatenation", "gated_addition"]:
            raise ValueError(
                "`comb_method` must be either 'concatenation' or 'gated_addition'."
            )
        self.comb_method = comb_method
        if augmentation_type not in ["Conv1d", "signatory"]:
            raise ValueError("`augmentation_type` must be 'Conv1d' or 'signatory'.")
        
        self.augmentation_type = augmentation_type
        if isinstance(hidden_dim_aug, int):
            hidden_dim_aug = [hidden_dim_aug]
        elif hidden_dim_aug is None:
            hidden_dim_aug = []
        self.hidden_dim_aug = hidden_dim_aug
        if augmentation_args is None:
            augmentation_args = {"kernel_size": 3,
                                 "stride": 1,
                                 "padding": 1}
        # convolution
        self.conv = nn.Conv1d(
            in_channels=input_channels,
            out_channels=output_channels,
            **augmentation_args,
        )
        self.augment = signatory.Augment(
            in_channels=input_channels,
            layer_sizes=self.hidden_dim_aug + [output_channels],
            include_original=False,
            include_time=False,
            **augmentation_args,
        )
        # non-linearity
        self.tanh1 = nn.Tanh()

        self.signature_layers = []
        self.lstm_layers = []
        for l in range(len(self.hidden_dim_lstm)):
            self.signature_layers.append(signatory.LogSignature(depth=sig_depth, stream=True))
            if l == 0:
                input_dim_lstm = signatory.logsignature_channels(output_channels, sig_depth)
            else:
                input_dim_lstm = signatory.logsignature_channels(self.hidden_dim_lstm[l-1], sig_depth)
            self.lstm_layers.append(nn.LSTM(
                input_size=input_dim_lstm,
                hidden_size=self.hidden_dim_lstm[l],
                num_layers=1,
                batch_first=True,
                bidirectional=False if l!=(len(self.hidden_dim_lstm)-1) else BiLSTM,
            ))
        
        self.signature_layers = nn.ModuleList(self.signature_layers)
        self.lstm_layers = nn.ModuleList(self.lstm_layers)

        # signature without lift (for passing into FFN)
        mult = 2 if BiLSTM else 1
        self.signature2 = signatory.LogSignature(depth=sig_depth, stream=False)

        # find dimension of features to pass through FFN
        if self.comb_method == "concatenation":
            input_dim = (
                signatory.logsignature_channels(
                    in_channels=mult * self.hidden_dim_lstm[-1], depth=sig_depth
                )
                + self.num_time_features
                + self.embedding_dim
            )
        elif self.comb_method == "gated_addition":
            input_dim = self.embedding_dim
            input_gated_linear = (
                signatory.logsignature_channels(
                    in_channels=mult * self.hidden_dim_lstm[-1], depth=sig_depth
                )
                + self.num_time_features
            )
            if self.embedding_dim > 0:
                self.fc_scale = nn.Linear(input_gated_linear, self.embedding_dim)
                self.scaler = torch.nn.Parameter(torch.zeros(1, self.embedding_dim))
            else:
                self.fc_scale = nn.Linear(input_gated_linear, input_gated_linear)
                self.scaler = torch.nn.Parameter(torch.zeros(1, input_gated_linear))
            # non-linearity
            self.tanh2 = nn.Tanh()

        # FFN: input layer
        self.ffn_input_layer = nn.Linear(input_dim, self.hidden_dim_ffn[0])
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        input_dim = self.hidden_dim_ffn[0]
        
        # FFN: hidden layers
        self.ffn_linear_layers = []
        self.ffn_non_linear_layers = []
        self.dropout_layers = []
        for l in range(len(self.hidden_dim_ffn)):
            self.ffn_linear_layers.append(nn.Linear(input_dim, self.hidden_dim_ffn[l]))
            self.ffn_non_linear_layers.append(nn.ReLU())
            self.dropout_layers.append(nn.Dropout(dropout_rate))
            input_dim = self.hidden_dim_ffn[l]
        
        self.ffn_linear_layers = nn.ModuleList(self.ffn_linear_layers)
        self.ffn_non_linear_layers = nn.ModuleList(self.ffn_non_linear_layers)
        self.dropout_layers = nn.ModuleList(self.dropout_layers)
        
        # FFN: readout
        self.ffn_final_layer = nn.Linear(input_dim, output_dim)

    def forward(self, x: torch.Tensor):
        # x has dimensions [batch, length of signal, channels]

        # convolution
        if self.augmentation_type == "Conv1d":
            # input has dimensions [batch, length of signal, channels]
            # swap dimensions to get [batch, channels, length of signal]
            # (nn.Conv1d expects this)
            out = torch.transpose(x, 1, 2)
            # get only the path information
            out = self.conv(out[:, : self.input_channels, :])
            out = self.tanh1(out)
            # make output have dimensions [batch, length of signal, channels]
            out = torch.transpose(out, 1, 2)
        elif self.augmentation_type == "signatory":
            # input has dimensions [batch, length of signal, channels]
            # (signatory.Augment expects this)
            # and get only the path information
            # output has dimensions [batch, length of signal, channels]
            out = self.augment(x[:, :, : self.input_channels])

        # take signature lifts and lstm
        for l in range(len(self.hidden_dim_lstm)):
            out = self.signature_layers[l](out)
            out, _ = self.lstm_layers[l](out)
        
        # signature
        out = self.signature2(out)

        # combine last post embedding
        if x.shape[2] > self.input_channels:
            # we have things to concatenate to the path
            if self.comb_method == "concatenation":
                if self.num_time_features > 0:
                    # concatenate any time features
                    # take the maximum for the latest time
                    out = torch.cat(
                        (
                            out,
                            x[
                                :,
                                :,
                                self.input_channels : (
                                    self.input_channels + self.num_time_features
                                ),
                            ].max(1)[0],
                        ),
                        dim=1,
                    )
                if x.shape[2] > self.input_channels + self.num_time_features:
                    # concatenate current post embedding if provided
                    out = torch.cat(
                        (
                            out,
                            x[:, 0, (self.input_channels + self.num_time_features) :],
                        ),
                        dim=1,
                    )
            elif self.comb_method == "gated_addition":
                if self.num_time_features > 0:
                    # concatenate any time features
                    out_gated = torch.cat(
                        (
                            out,
                            x[
                                :,
                                :,
                                self.input_channels : (
                                    self.input_channels + self.num_time_features
                                ),
                            ].max(1)[0],
                        ),
                        dim=1,
                    )
                else:
                    out_gated = out
                out_gated = self.fc_scale(out_gated.float())
                out_gated = self.tanh2(out_gated)
                out_gated = torch.mul(self.scaler, out_gated)
                if x.shape[2] > self.input_channels + self.num_time_features:
                    # concatenate current post embedding if provided
                    out = (
                        out_gated
                        + x[:, 0, (self.input_channels + self.num_time_features) :],
                    )
                else:
                    out = out_gated

        # FFN: input layer
        out = self.ffn_input_layer(out)
        out = self.relu(out)
        out = self.dropout(out)
        
        # FFN: hidden layers    
        for l in range(len(self.hidden_dim_ffn)):
            out = self.ffn_linear_layers[l](out)
            out = self.ffn_non_linear_layers[l](out)
            out = self.dropout_layers[l](out)

        # FFN: readout
        out = self.ffn_final_layer(out)

        return out


In [217]:
x_data, input_channels = obtain_SDSN_input(sbert_768_embeddings, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.06357741355896
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0295138359069824
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9790611267089844 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6608085036277771
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.4031231105327606
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.6909303990277377 || Accuracy: 0.7043090462684631 || F1-score: 0.57423324479462
Early stopping at epoch 143!
Accuracy on dataset of size 672: 70.68452453613281 %.
Average loss: 0.7206933064894243
proportion of labels in prediction: [tensor(0.7247), tensor(0.1815), tensor(0.0938)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81236203 0.53676471 0.40963855]
- f1 (average): 0.586255097001

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0883405208587646
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0826038122177124
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.011209325356917 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7276307940483093
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.10399860888719559
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.736944171515378 || Accuracy: 0.7132243514060974 || F1-score: 0.5956179878008959
Early stopping at epoch 150!
Accuracy on dataset of size 672: 71.2797622680664 %.
Average loss: 0.7714293544942682
proportion of labels in prediction: [tensor(0.7217), tensor(0.1741), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8119469  0.55430712 0.43930636]
- f1 (average): 0.6018534590

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.115033507347107
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0796059370040894
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0137215581807224 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7191870212554932
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.17231343686580658
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7580459117889404 || Accuracy: 0.7132243514060974 || F1-score: 0.5737328833974377
Early stopping at epoch 171!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.7812599377198652
proportion of labels in prediction: [tensor(0.7396), tensor(0.1607), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81441048 0.51162791 0.44705882]
- f1 (average): 0.59103240

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0850569009780884
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0340579748153687
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9858841733499006 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7220345735549927
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 1.0345451831817627
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7413348393006758 || Accuracy: 0.6805348992347717 || F1-score: 0.4281073446327684
Early stopping at epoch 132!
Accuracy on dataset of size 672: 67.41071319580078 %.
Average loss: 0.7796666893092069
proportion of labels in prediction: [tensor(0.7738), tensor(0.2262), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81576145 0.46357616 0.        ]
- f1 (average): 0.426445869096

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.2992570400238037
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0141891241073608
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9963395866480741 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6954364776611328
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3070055842399597
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7299812002615496 || Accuracy: 0.7087666988372803 || F1-score: 0.5842389192171975
Early stopping at epoch 143!
Accuracy on dataset of size 672: 70.83333587646484 %.
Average loss: 0.7468507777560841
proportion of labels in prediction: [tensor(0.7455), tensor(0.1622), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81304348 0.51737452 0.42424242]
- f1 (average): 0.58488680

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0792440176010132
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0634193420410156
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.004805174740878 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7142853736877441
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.10122737288475037
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7249864664944735 || Accuracy: 0.699851393699646 || F1-score: 0.5725833083379148
Early stopping at epoch 146!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.766713722185655
proportion of labels in prediction: [tensor(0.7158), tensor(0.1652), tensor(0.1190)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81111111 0.49808429 0.44808743]
- f1 (average): 0.5857609446

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.138651967048645
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0537822246551514
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0088018070567737 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7623844742774963
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.11575664579868317
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7634969407861883 || Accuracy: 0.7087666988372803 || F1-score: 0.572342995169082
Early stopping at epoch 157!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.7467622756958008
proportion of labels in prediction: [tensor(0.7202), tensor(0.1756), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81284607 0.51492537 0.42774566]
- f1 (average): 0.585172368

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1052340269088745
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.070438027381897
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0075113394043662 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.738377571105957
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 1.0748851299285889
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7502310384403575 || Accuracy: 0.689450204372406 || F1-score: 0.43603099875600565
Epoch: 201/10000 || Item: 0/85 || Loss: 0.5760343074798584
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 1.0272172689437866
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.7527173920111223 || Accuracy: 0.689450204372406 || F1-score: 0.43603099875600565
Epoch: 301/10000 || 

Epoch: 2501/10000 || Item: 0/85 || Loss: 0.7550318837165833
--------------------------------------------------
##### Epoch: 2501/10000 || Loss: 0.6395190954208374
--------------------------------------------------
Validation || Epoch: 2501 || Loss: 0.7601532719352029 || Accuracy: 0.689450204372406 || F1-score: 0.43625159534813546
Epoch: 2601/10000 || Item: 0/85 || Loss: 0.7484893798828125
--------------------------------------------------
##### Epoch: 2601/10000 || Loss: 0.6841627359390259
--------------------------------------------------
Validation || Epoch: 2601 || Loss: 0.7601483139124784 || Accuracy: 0.689450204372406 || F1-score: 0.43625159534813546
Epoch: 2701/10000 || Item: 0/85 || Loss: 0.6809874773025513
--------------------------------------------------
##### Epoch: 2701/10000 || Loss: 0.5480964779853821
--------------------------------------------------
Validation || Epoch: 2701 || Loss: 0.7565477219494906 || Accuracy: 0.689450204372406 || F1-score: 0.43625159534813546
Earl

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1839221715927124
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.058618426322937
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9997325431216847 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6837211847305298
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3611336648464203
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.6946919939734719 || Accuracy: 0.7087666988372803 || F1-score: 0.5921402965791092
Early stopping at epoch 143!
Accuracy on dataset of size 672: 71.13095092773438 %.
Average loss: 0.7421822385354475
proportion of labels in prediction: [tensor(0.7292), tensor(0.1756), tensor(0.0952)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81628163 0.54477612 0.40718563]
- f1 (average): 0.589414458

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.107619047164917
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0343412160873413
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9972853606397455 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.736257791519165
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.0675061047077179
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7377959814938632 || Accuracy: 0.7043090462684631 || F1-score: 0.5648561298637451
Early stopping at epoch 146!
Accuracy on dataset of size 672: 71.57737731933594 %.
Average loss: 0.7729015296155756
proportion of labels in prediction: [tensor(0.7277), tensor(0.1756), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81938326 0.52985075 0.45238095]
- f1 (average): 0.6005383195

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1259416341781616
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0837455987930298
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0123418732122942 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.9185170531272888
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.23807857930660248
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7706064636057074 || Accuracy: 0.6671619415283203 || F1-score: 0.42035928143712575
Early stopping at epoch 130!
Accuracy on dataset of size 672: 67.41071319580078 %.
Average loss: 0.7694918459111993
proportion of labels in prediction: [tensor(0.7649), tensor(0.2351), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81672026 0.46753247 0.        ]
- f1 (average): 0.4280842415

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0599114894866943
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0687390565872192
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9648291631178423 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7561215758323669
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.990820050239563
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7462477900765159 || Accuracy: 0.6849925518035889 || F1-score: 0.4368428578954895
Early stopping at epoch 133!
Accuracy on dataset of size 672: 66.66666412353516 %.
Average loss: 0.7871717052026228
proportion of labels in prediction: [tensor(0.7604), tensor(0.2396), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80860215 0.46302251 0.        ]
- f1 (average): 0.4238748861920

## SBERT 384

In [218]:
x_data, input_channels = obtain_SDSN_input(sbert_384_embeddings, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.121998906135559
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0471062660217285
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.993124701760032 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7265946269035339
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.08941896259784698
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7369477315382524 || Accuracy: 0.6939078569412231 || F1-score: 0.5744239507379422
Early stopping at epoch 162!
Accuracy on dataset of size 672: 70.83333587646484 %.
Average loss: 0.7321670651435852
proportion of labels in prediction: [tensor(0.7128), tensor(0.1905), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81514477 0.51079137 0.46428571]
- f1 (average): 0.596740615

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1142566204071045
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.003860354423523
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.996264788237485 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7522861957550049
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.11122745275497437
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7394544698975303 || Accuracy: 0.6953937411308289 || F1-score: 0.5757383840516413
Early stopping at epoch 156!
Accuracy on dataset of size 672: 69.94047546386719 %.
Average loss: 0.7624564875255931
proportion of labels in prediction: [tensor(0.6964), tensor(0.1860), tensor(0.1176)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80045096 0.51636364 0.48351648]
- f1 (average): 0.600110359

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1017228364944458
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.025413990020752
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9970488927581094 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.9822747707366943
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.37978965044021606
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7980571822686628 || Accuracy: 0.6419019103050232 || F1-score: 0.394701168668628
Early stopping at epoch 124!
Accuracy on dataset of size 672: 65.92262268066406 %.
Average loss: 0.7764183337038214
proportion of labels in prediction: [tensor(0.7440), tensor(0.2560), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81828074 0.41614907 0.        ]
- f1 (average): 0.4114766027525

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1493381261825562
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.092268705368042
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.0237938599152998 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8685616850852966
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.9867019057273865
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7764909755099904 || Accuracy: 0.6508172154426575 || F1-score: 0.39535462001215427
Early stopping at epoch 139!
Accuracy on dataset of size 672: 67.11309814453125 %.
Average loss: 0.7776664007793773
proportion of labels in prediction: [tensor(0.7619), tensor(0.2381), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82277121 0.43870968 0.        ]
- f1 (average): 0.420493630389

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.2206028699874878
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0377821922302246
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9940246831287037 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7519584894180298
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.13576968014240265
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7378264719789679 || Accuracy: 0.6879643201828003 || F1-score: 0.5288787549764749
Early stopping at epoch 148!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.7749998461116444
proportion of labels in prediction: [tensor(0.7307), tensor(0.1756), tensor(0.0938)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80879121 0.50746269 0.42168675]
- f1 (average): 0.5793135

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1506388187408447
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0329482555389404
--------------------------------------------------
Validation || Epoch: 1 || Loss: 1.014827999201688 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7141520380973816
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.15407924354076385
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.768506342714483 || Accuracy: 0.6731054782867432 || F1-score: 0.5451255843164818
Early stopping at epoch 146!
Accuracy on dataset of size 672: 70.23809814453125 %.
Average loss: 0.7854068333452399
proportion of labels in prediction: [tensor(0.7232), tensor(0.1756), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81104972 0.50746269 0.43274854]
- f1 (average): 0.583753649

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1001724004745483
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0368224382400513
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9815603982318531 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.9649040699005127
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.39289215207099915
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7823824503205039 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 201/10000 || Item: 0/85 || Loss: 0.7215513586997986
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.8300783038139343
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.772163217717951 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 301/10000 |

Epoch: 2501/10000 || Item: 0/85 || Loss: 0.6634032726287842
--------------------------------------------------
##### Epoch: 2501/10000 || Loss: 1.3026859760284424
--------------------------------------------------
Validation || Epoch: 2501 || Loss: 0.7831812826069918 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 2601/10000 || Item: 0/85 || Loss: 0.7258135676383972
--------------------------------------------------
##### Epoch: 2601/10000 || Loss: 1.1447083950042725
--------------------------------------------------
Validation || Epoch: 2601 || Loss: 0.7705647891218012 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 2701/10000 || Item: 0/85 || Loss: 0.7064722180366516
--------------------------------------------------
##### Epoch: 2701/10000 || Loss: 0.6171582341194153
--------------------------------------------------
Validation || Epoch: 2701 || Loss: 0.7781545357270674 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoc

Epoch: 5001/10000 || Item: 0/85 || Loss: 0.9182127714157104
--------------------------------------------------
##### Epoch: 5001/10000 || Loss: 0.42393115162849426
--------------------------------------------------
Validation || Epoch: 5001 || Loss: 0.7761146859689192 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 5101/10000 || Item: 0/85 || Loss: 0.6947301626205444
--------------------------------------------------
##### Epoch: 5101/10000 || Loss: 1.0078166723251343
--------------------------------------------------
Validation || Epoch: 5101 || Loss: 0.7699445431882684 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 5201/10000 || Item: 0/85 || Loss: 0.6850304007530212
--------------------------------------------------
##### Epoch: 5201/10000 || Loss: 0.3822188675403595
--------------------------------------------------
Validation || Epoch: 5201 || Loss: 0.7812753753228621 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epo

Epoch: 7501/10000 || Item: 0/85 || Loss: 0.7156424522399902
--------------------------------------------------
##### Epoch: 7501/10000 || Loss: 0.42704275250434875
--------------------------------------------------
Validation || Epoch: 7501 || Loss: 0.7731098207560453 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 7601/10000 || Item: 0/85 || Loss: 0.6039122939109802
--------------------------------------------------
##### Epoch: 7601/10000 || Loss: 1.1046546697616577
--------------------------------------------------
Validation || Epoch: 7601 || Loss: 0.7730671980164268 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epoch: 7701/10000 || Item: 0/85 || Loss: 0.8365492224693298
--------------------------------------------------
##### Epoch: 7701/10000 || Loss: 1.0123564004898071
--------------------------------------------------
Validation || Epoch: 7701 || Loss: 0.7661543488502502 || Accuracy: 0.6656760573387146 || F1-score: 0.4195811292511829
Epo

Accuracy on dataset of size 672: 66.66666412353516 %.
Average loss: 0.8003735433925282
proportion of labels in prediction: [tensor(0.7366), tensor(0.2634), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81619256 0.4587156  0.        ]
- f1 (average): 0.42496938550177665
- accuracy: 0.6666666865348816

********** lstm_hidden_dim: [12, 12, 8] || ffnhidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0921334028244019
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0322431325912476
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9826022440736945 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8259876370429993
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.8322278261184692
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7861089543862776 || Accuracy: 0.6627042889595032 || F1-score: 0.4148525206995593
Early stopping at epoch 128!
Accuracy on dataset of size 672: 66.51786041259766 %.
Average loss: 0.7913895574483004
proportion of labels in prediction: [tensor(0.7440), tensor(0.2560), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81828074 0.44099379 0.        ]
- f1 (average): 0.419758176251

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0644787549972534
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.065685749053955
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9856856302781538 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7554184794425964
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.09720936417579651
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7491351853717457 || Accuracy: 0.6820207834243774 || F1-score: 0.5598449228738761
Early stopping at epoch 143!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.7794241146607832
proportion of labels in prediction: [tensor(0.7113), tensor(0.1890), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80936455 0.49097473 0.38823529]
- f1 (average): 0.56285819

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1146775484085083
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.035070538520813
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9956874630667947 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7303361892700195
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.12737801671028137
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.777530084956776 || Accuracy: 0.6745913624763489 || F1-score: 0.5613311220495781
Early stopping at epoch 158!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.791927532716231
proportion of labels in prediction: [tensor(0.6994), tensor(0.1845), tensor(0.1161)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80539933 0.51824818 0.4640884 ]
- f1 (average): 0.5959119660

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1051678657531738
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0238896608352661
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.995102280920202 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.9536846280097961
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.4498940408229828
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8075882250612433 || Accuracy: 0.6344724893569946 || F1-score: 0.3889235667634446
Early stopping at epoch 127!
Accuracy on dataset of size 672: 64.28571319580078 %.
Average loss: 0.8028621727770026
proportion of labels in prediction: [tensor(0.7485), tensor(0.2515), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80477223 0.38244514 0.        ]
- f1 (average): 0.3957391251130

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1041083335876465
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0664429664611816
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9895215251229026 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8241069912910461
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 1.0187573432922363
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7859444618225098 || Accuracy: 0.6508172154426575 || F1-score: 0.40133884673961034
Early stopping at epoch 126!
Accuracy on dataset of size 672: 66.51786041259766 %.
Average loss: 0.7918119051239707
proportion of labels in prediction: [tensor(0.7500), tensor(0.2500), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8212351  0.42767296 0.        ]
- f1 (average): 0.41630268630

## Pretrained BERT

### Mean pooled

In [219]:
x_data, input_channels = obtain_SDSN_input(pooled_mean_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1083263158798218
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9703396558761597
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8777659806338224 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6071421504020691
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.29399019479751587
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8207249262116172 || Accuracy: 0.679049015045166 || F1-score: 0.53183287934843
Early stopping at epoch 119!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.7458651282570579
proportion of labels in prediction: [tensor(0.7128), tensor(0.1920), tensor(0.0952)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81514477 0.53763441 0.43113772]
- f1 (average): 0.5946389664

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1249394416809082
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9522772431373596
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.943194947459481 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7068532705307007
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.12456218898296356
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8211802244186401 || Accuracy: 0.6864784359931946 || F1-score: 0.5544798456325873
Early stopping at epoch 124!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.7713598229668357
proportion of labels in prediction: [tensor(0.6935), tensor(0.2098), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80677966 0.50859107 0.39285714]
- f1 (average): 0.56940928

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0913692712783813
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9906197786331177
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9287018830125983 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8286048769950867
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.20334593951702118
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8711824471300299 || Accuracy: 0.6523030996322632 || F1-score: 0.5035916812232601
Early stopping at epoch 122!
Accuracy on dataset of size 672: 68.45237731933594 %.
Average loss: 0.7822817184708335
proportion of labels in prediction: [tensor(0.6949), tensor(0.2128), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80361174 0.49829352 0.37575758]
- f1 (average): 0.5592209

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0686894655227661
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9680382013320923
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9435630494898016 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7456050515174866
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7053532004356384
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8338342850858514 || Accuracy: 0.6641901731491089 || F1-score: 0.42409309897265546
Early stopping at epoch 126!
Accuracy on dataset of size 672: 65.77381134033203 %.
Average loss: 0.8107903816483237
proportion of labels in prediction: [tensor(0.7545), tensor(0.2455), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80561555 0.43809524 0.        ]
- f1 (average): 0.41457026295

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.207420825958252
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9080084562301636
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8924839334054426 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.721686601638794
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7638161182403564
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8360991965640675 || Accuracy: 0.6701337099075317 || F1-score: 0.5284795665230447
Early stopping at epoch 131!
Accuracy on dataset of size 672: 67.85713958740234 %.
Average loss: 0.8351644006642428
proportion of labels in prediction: [tensor(0.7054), tensor(0.2068), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79507279 0.4982699  0.35802469]
- f1 (average): 0.5504557919

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1019978523254395
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9218642115592957
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9295949014750394 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6193119287490845
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.1335456371307373
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.824391554702412 || Accuracy: 0.6745913624763489 || F1-score: 0.5329199735449736
Early stopping at epoch 132!
Accuracy on dataset of size 672: 68.00595092773438 %.
Average loss: 0.8039264787327159
proportion of labels in prediction: [tensor(0.7098), tensor(0.1994), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79910714 0.48591549 0.36585366]
- f1 (average): 0.550292098

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1113650798797607
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9515938758850098
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.932028591632843 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7567929029464722
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.13789458572864532
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8116248683495955 || Accuracy: 0.6805348992347717 || F1-score: 0.5538295414894473
Epoch: 201/10000 || Item: 0/85 || Loss: 0.636617124080658
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.47726404666900635
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.8049460432746194 || Accuracy: 0.6805348992347717 || F1-score: 0.5538295414894473
Epoch: 301/10000 |

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.110866665840149
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0323796272277832
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9433342760259454 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6905802488327026
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.6991264224052429
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8670690059661865 || Accuracy: 0.6448736786842346 || F1-score: 0.41140734684814256
Early stopping at epoch 122!
Accuracy on dataset of size 672: 64.28571319580078 %.
Average loss: 0.852442280812697
proportion of labels in prediction: [tensor(0.6979), tensor(0.3021), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7972973  0.44192635 0.        ]
- f1 (average): 0.4130745476354

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1104532480239868
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8665093183517456
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.893160110170191 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6507341861724854
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.34680408239364624
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8093088648535989 || Accuracy: 0.6656760573387146 || F1-score: 0.5295474762916609
Early stopping at epoch 119!
Accuracy on dataset of size 672: 68.60118865966797 %.
Average loss: 0.805629854852503
proportion of labels in prediction: [tensor(0.7113), tensor(0.1964), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80267559 0.4964539  0.37575758]
- f1 (average): 0.558295687

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1527830362319946
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9344223141670227
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9437594793059609 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6916497945785522
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.12838536500930786
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8135914260690863 || Accuracy: 0.668647825717926 || F1-score: 0.522173657439358
Early stopping at epoch 124!
Accuracy on dataset of size 672: 68.89881134033203 %.
Average loss: 0.7816357937726107
proportion of labels in prediction: [tensor(0.7009), tensor(0.1979), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81123596 0.50176678 0.3625731 ]
- f1 (average): 0.558525279

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0989179611206055
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.936793863773346
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9397207390178334 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7860616445541382
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.27014172077178955
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8227900916879828 || Accuracy: 0.6567607522010803 || F1-score: 0.5141564309003757
Early stopping at epoch 122!
Accuracy on dataset of size 672: 68.45237731933594 %.
Average loss: 0.7630500956015154
proportion of labels in prediction: [tensor(0.7158), tensor(0.1920), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81111111 0.46594982 0.36363636]
- f1 (average): 0.54689909

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1213955879211426
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0499165058135986
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9505978280847723 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6903181672096252
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7371382117271423
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8040579394860701 || Accuracy: 0.6627042889595032 || F1-score: 0.4252061721417711
Early stopping at epoch 137!
Accuracy on dataset of size 672: 65.0297622680664 %.
Average loss: 0.8316478512503884
proportion of labels in prediction: [tensor(0.7173), tensor(0.2827), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79689234 0.45882353 0.        ]
- f1 (average): 0.4185719570847

### Max pooled

In [220]:
x_data, input_channels = obtain_SDSN_input(pooled_max_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0611822605133057
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.906973659992218
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9062382687221874 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7280529141426086
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.11842703819274902
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7983393940058622 || Accuracy: 0.679049015045166 || F1-score: 0.5531351637546328
Early stopping at epoch 141!
Accuracy on dataset of size 672: 66.36904907226562 %.
Average loss: 0.8046743111176924
proportion of labels in prediction: [tensor(0.7113), tensor(0.2054), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7826087  0.47222222 0.33962264]
- f1 (average): 0.531484519

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0875803232192993
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9498467445373535
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.940553605556488 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6147798895835876
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.4620286226272583
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8429263559254733 || Accuracy: 0.6879643201828003 || F1-score: 0.5607682417229186
Early stopping at epoch 138!
Accuracy on dataset of size 672: 67.26190185546875 %.
Average loss: 0.894775005904111
proportion of labels in prediction: [tensor(0.6935), tensor(0.2173), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77740113 0.50675676 0.40490798]
- f1 (average): 0.5630219540

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0729649066925049
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9746654033660889
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9413475285876881 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8841552734375
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3010278046131134
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8812506090510975 || Accuracy: 0.6300148367881775 || F1-score: 0.3900167628981188
Early stopping at epoch 117!
Accuracy on dataset of size 672: 62.7976188659668 %.
Average loss: 0.8637093468145891
proportion of labels in prediction: [tensor(0.7292), tensor(0.2708), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78327833 0.39759036 0.        ]
- f1 (average): 0.3936228964261887

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0652843713760376
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.020917534828186
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9387365471233021 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7338456511497498
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7304773926734924
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.819216397675601 || Accuracy: 0.6404160261154175 || F1-score: 0.40319943709774214
Early stopping at epoch 131!
Accuracy on dataset of size 672: 65.17857360839844 %.
Average loss: 0.8098824078386481
proportion of labels in prediction: [tensor(0.7143), tensor(0.2857), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80088988 0.45614035 0.        ]
- f1 (average): 0.4190100761730

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.043005108833313
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9269269704818726
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9087737690318715 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8126065731048584
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3956730365753174
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8617757721380754 || Accuracy: 0.6433877944946289 || F1-score: 0.4230753239530409
Early stopping at epoch 131!
Accuracy on dataset of size 672: 64.88095092773438 %.
Average loss: 0.8578461842103438
proportion of labels in prediction: [tensor(0.7292), tensor(0.2530), tensor(0.0179)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78547855 0.4375     0.15652174]
- f1 (average): 0.459833428

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1013391017913818
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9390515089035034
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9443628517064181 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6210097074508667
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.08356453478336334
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8513612042773854 || Accuracy: 0.6627042889595032 || F1-score: 0.5111893471108547
Early stopping at epoch 138!
Accuracy on dataset of size 672: 66.36904907226562 %.
Average loss: 0.8813020912083712
proportion of labels in prediction: [tensor(0.7158), tensor(0.2098), tensor(0.0744)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78666667 0.45360825 0.33986928]
- f1 (average): 0.5267147

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1034269332885742
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0152961015701294
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9484180916439403 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.879435122013092
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.20181603729724884
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8428544510494579 || Accuracy: 0.6523030996322632 || F1-score: 0.4039750957854406
Epoch: 201/10000 || Item: 0/85 || Loss: 0.6834601759910583
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.8086645603179932
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.8389423977244984 || Accuracy: 0.6523030996322632 || F1-score: 0.4039750957854406
Epoch: 301/10000 |

Early stopping at epoch 2446!
Accuracy on dataset of size 672: 64.43452453613281 %.
Average loss: 0.8683453310619701
proportion of labels in prediction: [tensor(0.7351), tensor(0.2649), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79079956 0.43902439 0.        ]
- f1 (average): 0.40994131737593387
- accuracy: 0.644345223903656

********** lstm_hidden_dim: [12, 12, 8] || ffnhidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.074674129486084
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9654678702354431
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9404152740131725 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7875795364379883
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7538393139839172
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.871634450825778 || Accuracy: 0.6419019103050232 || F1-score: 0.40243998252665786
Early stopping at epoch 127!
Accuracy on dataset of size 672: 62.7976188659668 %.
Average loss: 0.8363772413947366
proportion of labels in prediction: [tensor(0.7336), tensor(0.2664), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78289474 0.39513678 0.        ]
- f1 (average): 0.39267717165253

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.062619924545288
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0131422281265259
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9251405325802889 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7082000374794006
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.18760985136032104
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7862089655616067 || Accuracy: 0.6745913624763489 || F1-score: 0.5419391430208126
Early stopping at epoch 129!
Accuracy on dataset of size 672: 68.60118865966797 %.
Average loss: 0.7925098592584784
proportion of labels in prediction: [tensor(0.7545), tensor(0.1875), tensor(0.0580)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80561555 0.47101449 0.32394366]
- f1 (average): 0.53352456

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1187163591384888
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9843406081199646
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9430152882229198 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6623855829238892
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.196440652012825
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8494816801764749 || Accuracy: 0.6285289525985718 || F1-score: 0.4420697964387284
Early stopping at epoch 132!
Accuracy on dataset of size 672: 63.69047546386719 %.
Average loss: 0.8374193473295732
proportion of labels in prediction: [tensor(0.7217), tensor(0.2604), tensor(0.0179)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78761062 0.39384615 0.13913043]
- f1 (average): 0.440195736

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.125571846961975
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0724183320999146
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9471527934074402 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 1.0156971216201782
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.2595859467983246
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8206297105008905 || Accuracy: 0.6508172154426575 || F1-score: 0.39949537150815106
Early stopping at epoch 124!
Accuracy on dataset of size 672: 64.73213958740234 %.
Average loss: 0.814558663151481
proportion of labels in prediction: [tensor(0.7396), tensor(0.2604), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79475983 0.43692308 0.        ]
- f1 (average): 0.4105609674168

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0989123582839966
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.032710075378418
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9369079145518217 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7517858743667603
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7406988143920898
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8357667110183022 || Accuracy: 0.6270430684089661 || F1-score: 0.3898856990962254
Early stopping at epoch 120!
Accuracy on dataset of size 672: 63.83928680419922 %.
Average loss: 0.7939372441985391
proportion of labels in prediction: [tensor(0.7262), tensor(0.2738), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7938258  0.41317365 0.        ]
- f1 (average): 0.4023331506776

### Sum pooled

In [221]:
x_data, input_channels = obtain_SDSN_input(pooled_sum_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.3013468980789185
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8354977965354919
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8586074764078314 || Accuracy: 0.6404160261154175 || F1-score: 0.3696874014654228
Epoch: 101/10000 || Item: 0/85 || Loss: 0.5119310021400452
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.18847788870334625
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7998412143100392 || Accuracy: 0.7057949304580688 || F1-score: 0.5972343295973432
Early stopping at epoch 119!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.8521335016597401
proportion of labels in prediction: [tensor(0.7247), tensor(0.1860), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81456954 0.50909091 0.46625767]
- f1 (average): 0.59663937

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1109387874603271
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8544552326202393
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9038361419330944 || Accuracy: 0.6433877944946289 || F1-score: 0.36858300812226386
Epoch: 101/10000 || Item: 0/85 || Loss: 0.38765737414360046
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.033970266580581665
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9083285440098156 || Accuracy: 0.6924219727516174 || F1-score: 0.5911978988564527
Early stopping at epoch 132!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.8130029873414473
proportion of labels in prediction: [tensor(0.6830), tensor(0.2277), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79726651 0.52145215 0.42944785]
- f1 (average): 0.58272

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0930824279785156
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9830728769302368
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9443454850803722 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.4144074022769928
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.15897485613822937
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9295072176239707 || Accuracy: 0.6968796253204346 || F1-score: 0.5974639910123781
Early stopping at epoch 127!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.9161134578964927
proportion of labels in prediction: [tensor(0.6682), tensor(0.2247), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79032258 0.52491694 0.48      ]
- f1 (average): 0.5984131

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1586970090866089
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0439859628677368
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9710449901494113 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.3956676721572876
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.23449645936489105
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8783430619673296 || Accuracy: 0.6939078569412231 || F1-score: 0.5786841232219383
Early stopping at epoch 123!
Accuracy on dataset of size 672: 69.64286041259766 %.
Average loss: 0.8908329497684132
proportion of labels in prediction: [tensor(0.6786), tensor(0.2188), tensor(0.1027)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8        0.53198653 0.45348837]
- f1 (average): 0.5951583

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.042363166809082
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7563664317131042
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8164049712094393 || Accuracy: 0.6567607522010803 || F1-score: 0.47700207906536257
Epoch: 101/10000 || Item: 0/85 || Loss: 0.4392238259315491
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.059755731374025345
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.857847273349762 || Accuracy: 0.689450204372406 || F1-score: 0.5943976893780728
Early stopping at epoch 119!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.8413175073536959
proportion of labels in prediction: [tensor(0.6845), tensor(0.2098), tensor(0.1057)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80546075 0.52920962 0.48275862]
- f1 (average): 0.605809664

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1025233268737793
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9335659146308899
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9173165451396595 || Accuracy: 0.6433877944946289 || F1-score: 0.3906025239031299
Epoch: 101/10000 || Item: 0/85 || Loss: 0.3760767877101898
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.003959278576076031
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9313850998878479 || Accuracy: 0.6805348992347717 || F1-score: 0.5916768076746531
Early stopping at epoch 127!
Accuracy on dataset of size 672: 69.3452377319336 %.
Average loss: 0.9569546471942555
proportion of labels in prediction: [tensor(0.6652), tensor(0.2351), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79445727 0.54545455 0.44705882]
- f1 (average): 0.59565688

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.088029146194458
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9533932209014893
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9125539443709634 || Accuracy: 0.6210995316505432 || F1-score: 0.2705276590251193
Epoch: 101/10000 || Item: 0/85 || Loss: 0.49797555804252625
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.21526889503002167
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9374861717224121 || Accuracy: 0.6953937411308289 || F1-score: 0.5977988382852487
Early stopping at epoch 127!
Accuracy on dataset of size 672: 68.30357360839844 %.
Average loss: 0.9290091612122275
proportion of labels in prediction: [tensor(0.6652), tensor(0.2307), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78290993 0.49836066 0.50867052]
- f1 (average): 0.59664703

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0724536180496216
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9943448901176453
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.941481660712849 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.2705845832824707
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.24389423429965973
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9391739585182883 || Accuracy: 0.6775631308555603 || F1-score: 0.5907092821166552
Early stopping at epoch 126!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.9894102215766907
proportion of labels in prediction: [tensor(0.6741), tensor(0.2158), tensor(0.1101)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7912844  0.50169492 0.50847458]
- f1 (average): 0.60048463

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.174203872680664
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7820780277252197
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8391301848671653 || Accuracy: 0.6537889838218689 || F1-score: 0.43733071671183416
Epoch: 101/10000 || Item: 0/85 || Loss: 0.3944697678089142
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.03525356575846672
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0298169417814775 || Accuracy: 0.689450204372406 || F1-score: 0.5992995500071298
Early stopping at epoch 116!
Accuracy on dataset of size 672: 70.53571319580078 %.
Average loss: 1.0018632168119603
proportion of labels in prediction: [tensor(0.6726), tensor(0.2202), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80367394 0.54362416 0.49142857]
- f1 (average): 0.612908890

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.101671814918518
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8468042612075806
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8828405141830444 || Accuracy: 0.6508172154426575 || F1-score: 0.4551098408913714
Epoch: 101/10000 || Item: 0/85 || Loss: 0.42085975408554077
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.00869707390666008
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0599372766234658 || Accuracy: 0.6627042889595032 || F1-score: 0.5727171197627999
Early stopping at epoch 121!
Accuracy on dataset of size 672: 68.89881134033203 %.
Average loss: 1.06849773905494
proportion of labels in prediction: [tensor(0.6443), tensor(0.2426), tensor(0.1131)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78873239 0.50479233 0.53631285]
- f1 (average): 0.6099458585

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.042513132095337
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.930135190486908
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8825143304738131 || Accuracy: 0.6344724893569946 || F1-score: 0.30546387972130545
Epoch: 101/10000 || Item: 0/85 || Loss: 0.5460414886474609
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.0366196371614933
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9511366432363336 || Accuracy: 0.6716195940971375 || F1-score: 0.5669770948601981
Early stopping at epoch 115!
Accuracy on dataset of size 672: 68.60118865966797 %.
Average loss: 0.9354486844756387
proportion of labels in prediction: [tensor(0.6741), tensor(0.2292), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79357798 0.48684211 0.48809524]
- f1 (average): 0.5895051083

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1079468727111816
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9050264954566956
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9055643948641691 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.2618783116340637
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.24361050128936768
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9748089367693121 || Accuracy: 0.679049015045166 || F1-score: 0.5868828704502329
Early stopping at epoch 123!
Accuracy on dataset of size 672: 67.85713958740234 %.
Average loss: 1.0673792145468972
proportion of labels in prediction: [tensor(0.6577), tensor(0.2307), tensor(0.1116)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78513357 0.47868852 0.50561798]
- f1 (average): 0.58981335

### CLS

In [222]:
x_data, input_channels = obtain_SDSN_input(pooled_cls_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.2040929794311523
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9543325304985046
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8827005624771118 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7267993688583374
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.19608074426651
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7830820517106489 || Accuracy: 0.6805348992347717 || F1-score: 0.5370926091179745
Early stopping at epoch 125!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.7530356483025984
proportion of labels in prediction: [tensor(0.7426), tensor(0.1696), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81481481 0.46969697 0.43209877]
- f1 (average): 0.5722035166

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1437132358551025
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.001387596130371
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9409243518655951 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.733620285987854
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.146580770611763
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7862045493992892 || Accuracy: 0.679049015045166 || F1-score: 0.5387477369664612
Early stopping at epoch 123!
Accuracy on dataset of size 672: 69.49404907226562 %.
Average loss: 0.7522918527776544
proportion of labels in prediction: [tensor(0.7232), tensor(0.1801), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80441989 0.49446494 0.42857143]
- f1 (average): 0.575818754241

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1192790269851685
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9309695959091187
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9031798947941173 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7530666589736938
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.5289303064346313
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8341609022834084 || Accuracy: 0.679049015045166 || F1-score: 0.5430116182079968
Early stopping at epoch 135!
Accuracy on dataset of size 672: 68.75 %.
Average loss: 0.7924448576840487
proportion of labels in prediction: [tensor(0.7232), tensor(0.1756), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79558011 0.46268657 0.46783626]
- f1 (average): 0.5753676449904527
- ac

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1225972175598145
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.020470380783081
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9420102292841132 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.578769862651825
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3644253611564636
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7749437364664945 || Accuracy: 0.6924219727516174 || F1-score: 0.5651662690808598
Early stopping at epoch 133!
Accuracy on dataset of size 672: 71.875 %.
Average loss: 0.7509793043136597
proportion of labels in prediction: [tensor(0.7173), tensor(0.1845), tensor(0.0982)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82574917 0.52554745 0.46153846]
- f1 (average): 0.6042783581285004
- ac

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1425316333770752
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9537950754165649
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.885266201062636 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6237946152687073
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.1699737012386322
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8081171404231678 || Accuracy: 0.6671619415283203 || F1-score: 0.5278475681152801
Early stopping at epoch 125!
Accuracy on dataset of size 672: 68.30357360839844 %.
Average loss: 0.7727127833799883
proportion of labels in prediction: [tensor(0.7336), tensor(0.1786), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79605263 0.45185185 0.43209877]
- f1 (average): 0.560001082

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1180133819580078
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9255759716033936
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9132819771766663 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7313652634620667
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.0869215875864029
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8132577646862377 || Accuracy: 0.658246636390686 || F1-score: 0.4984643819573214
Early stopping at epoch 123!
Accuracy on dataset of size 672: 68.1547622680664 %.
Average loss: 0.7469774267890237
proportion of labels in prediction: [tensor(0.7173), tensor(0.1875), tensor(0.0952)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80133185 0.44202899 0.43113772]
- f1 (average): 0.5581661878

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0959380865097046
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9692442417144775
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9381837411360308 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8282528519630432
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.18298083543777466
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8390961018475619 || Accuracy: 0.6671619415283203 || F1-score: 0.5085976184094277
Epoch: 201/10000 || Item: 0/85 || Loss: 0.6801908016204834
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.9419746398925781
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.819741119037975 || Accuracy: 0.6671619415283203 || F1-score: 0.5085976184094277
Epoch: 301/10000 |

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.127925157546997
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9608778953552246
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9378372539173473 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8151942491531372
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7873227000236511
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8786657940257679 || Accuracy: 0.6344724893569946 || F1-score: 0.4162058177243729
Early stopping at epoch 139!
Accuracy on dataset of size 672: 64.58333587646484 %.
Average loss: 0.8206196373159235
proportion of labels in prediction: [tensor(0.6935), tensor(0.2902), tensor(0.0164)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79548023 0.44057971 0.10526316]
- f1 (average): 0.447107698

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0581080913543701
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.965984046459198
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8825081424279646 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7237440943717957
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.2629973292350769
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7669510462067344 || Accuracy: 0.6879643201828003 || F1-score: 0.5468867367093668
Epoch: 201/10000 || Item: 0/85 || Loss: 0.8005213737487793
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 1.259475827217102
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.7732626741582697 || Accuracy: 0.6879643201828003 || F1-score: 0.5468867367093668
Epoch: 301/10000 || 

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1116864681243896
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9596397280693054
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9104283506220038 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6912787556648254
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.1996433138847351
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7782505317167803 || Accuracy: 0.6939078569412231 || F1-score: 0.56118227629019
Early stopping at epoch 123!
Accuracy on dataset of size 672: 68.30357360839844 %.
Average loss: 0.7525844194672324
proportion of labels in prediction: [tensor(0.7202), tensor(0.1905), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79512735 0.48201439 0.40490798]
- f1 (average): 0.5606832390

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.096859335899353
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9114543795585632
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.907750584862449 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8409744501113892
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.5794983506202698
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8129292293028398 || Accuracy: 0.6671619415283203 || F1-score: 0.5217717128873098
Early stopping at epoch 129!
Accuracy on dataset of size 672: 67.70833587646484 %.
Average loss: 0.8091904737732627
proportion of labels in prediction: [tensor(0.7455), tensor(0.1652), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79565217 0.45210728 0.36809816]
- f1 (average): 0.5386192043

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.093720555305481
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9559621214866638
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9292432503266768 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6506471633911133
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7666477560997009
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8619428331201727 || Accuracy: 0.6359583735466003 || F1-score: 0.40043296039233195
Early stopping at epoch 120!
Accuracy on dataset of size 672: 63.39285659790039 %.
Average loss: 0.849154607816176
proportion of labels in prediction: [tensor(0.7202), tensor(0.2798), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78848283 0.41420118 0.        ]
- f1 (average): 0.4008946728088

## Fine-tuned BERT

### Mean pooled

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_mean, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

### Max pooled

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_max, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

### Sum pooled

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_sum, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

### CLS

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_cls, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

Baselines:
   - just looking at the sentence embeddings (encodes nothing about the history on the post)
       - highlights importance of looking at the sequence
   - averaging history
   - comparing the cosine similarity between previous post and current post to see if switch
   
Test for:
- How many posts do you need to look back?