In [2]:
import pandas as pd
import numpy as np
import torch
import transformers
import pickle
import os

import nlpsig
import nlpsig_networks

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
from nlpsig.classification_utils import split_dataset
from nlpsig_networks.pytorch_utils import SaveBestModel, validation_pytorch, training_pytorch, testing_pytorch, set_seed
from nlpsig_networks.ffn import FeedforwardNeuralNetModel
from nlpsig_networks.deepsignet import StackedDeepSigNet
from nlpsig_networks.focal_loss import FocalLoss, ClassBalanced_FocalLoss
from sklearn import metrics

from tqdm.notebook import tqdm

seed = 2023

In [3]:
import signatory

## AnnoMI

In [4]:
anno_mi = pd.read_csv("AnnoMI-full.csv")
anno_mi["datetime"] = pd.to_datetime(anno_mi["timestamp"])
anno_mi = anno_mi.drop(columns=["video_title", "video_url"])
anno_mi.head()

Unnamed: 0,mi_quality,transcript_id,topic,utterance_id,interlocutor,timestamp,utterance_text,annotator_id,therapist_input_exists,therapist_input_subtype,reflection_exists,reflection_subtype,question_exists,question_subtype,main_therapist_behaviour,client_talk_type,datetime
0,high,0,reducing alcohol consumption,0,therapist,00:00:13,Thanks for filling it out. We give this form t...,3,False,,False,,True,open,question,,2023-06-02 00:00:13
1,high,0,reducing alcohol consumption,1,client,00:00:24,Sure.,3,,,,,,,,neutral,2023-06-02 00:00:24
2,high,0,reducing alcohol consumption,2,therapist,00:00:25,"So, let's see. It looks that you put-- You dri...",3,True,information,False,,False,,therapist_input,,2023-06-02 00:00:25
3,high,0,reducing alcohol consumption,3,client,00:00:34,Mm-hmm.,3,,,,,,,,neutral,2023-06-02 00:00:34
4,high,0,reducing alcohol consumption,4,therapist,00:00:34,-and you usually have three to four drinks whe...,3,True,information,False,,False,,therapist_input,,2023-06-02 00:00:34


In [326]:
anno_mi["transcript_id"].value_counts()[anno_mi["transcript_id"].value_counts() > 100]

56     1750
7       660
55      650
121     598
109     460
98      394
133     381
36      315
66      310
100     283
64      262
21      248
84      235
27      230
130     220
68      204
5       203
49      182
44      173
71      169
116     167
22      133
85      131
83      128
61      126
28      123
34      117
18      115
97      108
53      108
58      101
110     101
Name: transcript_id, dtype: int64

In [322]:
anno_mi.iloc[11]["utterance_id"].mean()

11.0

In [314]:
len(anno_mi["transcript_id"].value_counts())

133

In [5]:
len(anno_mi)

13551

In [311]:
anno_mi["interlocutor"].value_counts()

therapist    6826
client       6725
Name: interlocutor, dtype: int64

In [312]:
anno_mi["main_therapist_behaviour"].value_counts() / anno_mi["interlocutor"].value_counts()["therapist"]

other              0.313947
question           0.286258
reflection         0.251538
therapist_input    0.148257
Name: main_therapist_behaviour, dtype: float64

In [6]:
anno_mi["client_talk_type"].value_counts() / anno_mi["interlocutor"].value_counts()["client"]

neutral    0.627063
change     0.248030
sustain    0.124907
Name: client_talk_type, dtype: float64

In [7]:
anno_mi["interlocutor"].value_counts()

therapist    6826
client       6725
Name: interlocutor, dtype: int64

In [8]:
anno_mi["topic"].value_counts()

reducing alcohol consumption                                                          2326
more exercise / increasing activity                                                   2034
reducing recidivism                                                                   1303
reducing drug use                                                                     1104
diabetes management                                                                    948
smoking cessation                                                                      923
smoking cessation                                                                      541
taking medicine / following medical procedure                                          448
asthma management                                                                      431
avoiding DOI                                                                           394
changing approach to disease                                                           315

In [9]:
len(anno_mi["transcript_id"].unique())

133

## Only considering client for now...

In [10]:
client_index = [isinstance(x, str) for x in anno_mi["client_talk_type"]]
sum(client_index)

6725

In [11]:
y_data = anno_mi["client_talk_type"][client_index]
y_data.shape

(6725,)

In [12]:
y_data[0:20]

1     neutral
3     neutral
5     neutral
7     neutral
9     neutral
11    neutral
13    neutral
15    neutral
17    neutral
19    neutral
21    neutral
23    neutral
25    neutral
27    neutral
29    neutral
31    neutral
33    neutral
35     change
37     change
39     change
Name: client_talk_type, dtype: object

In [13]:
label_to_id = {y_data.unique()[i]: i for i in range(len(y_data.unique()))}
id_to_label = {v: k for k, v in label_to_id.items()}

In [14]:
label_to_id

{'neutral': 0, 'change': 1, 'sustain': 2}

In [15]:
id_to_label

{0: 'neutral', 1: 'change', 2: 'sustain'}

In [16]:
y_data = [label_to_id[x] for x in y_data]
y_data[0:20]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

## Obtaining SBERT Embeddings

We can use the `SentenceEncoder` class within `nlpsig` to obtain sentence embeddings from a model. This class uses the [`sentence-transformer`](https://www.sbert.net/docs/package_reference/SentenceTransformer.html) package and here, we have use the pre-trained `all-mpnet-base-v2` model by passing this name as a string to the class - alternative models can be found [here](https://www.sbert.net/docs/pretrained_models.html).

We can pass these into the constructor of the class to initialise our text encoder as follows:

In [17]:
sbert_768_embeddings = np.load("anno_mi_sentence_embeddings_768.npy")

In [16]:
# initialise the Text Encoder
sbert_model_768 = "all-mpnet-base-v2"
text_encoder_sbert_768 = nlpsig.SentenceEncoder(df=anno_mi,
                                                feature_name="utterance_text",
                                                model_name=sbert_model_768)
text_encoder_sbert_768.load_pretrained_model()

The class has a `.encode_sentence_transformer()` method which first loads in the model (using the `model_name` and `model_args` attributes) and then obtains an embedding for each sentence. These sentence embeddings are then stored in the `embeddings_sentence` attribute of the object.

In [17]:
text_encoder_sbert_768.obtain_embeddings()
sbert_768_embeddings = text_encoder_sbert_768.sentence_embeddings

[INFO] number of sentences to encode: 13551


Batches:   0%|          | 0/212 [00:00<?, ?it/s]

In [18]:
np.save("anno_mi_sentence_embeddings_768", sbert_768_embeddings)

## SBERT with 384 dimension vectors

In [18]:
sbert_384_embeddings = np.load("anno_mi_sentence_embeddings_384.npy")

In [20]:
# initialise the Text Encoder
sbert_model_384 = "all-MiniLM-L12-v2"
text_encoder_sbert_384 = nlpsig.SentenceEncoder(df=anno_mi,
                                                feature_name="utterance_text",
                                                model_name=sbert_model_384)
text_encoder_sbert_384.load_pretrained_model()

In [21]:
text_encoder_sbert_384.obtain_embeddings()
sbert_384_embeddings = text_encoder_sbert_384.sentence_embeddings

[INFO] number of sentences to encode: 13551


Batches:   0%|          | 0/212 [00:00<?, ?it/s]

In [22]:
np.save("anno_mi_sentence_embeddings_384", sbert_384_embeddings)

## Pretrained BERT and pooling

In [19]:
pooled_mean_pretrained = np.load("anno_mi_pretrained_BERT_mean.npy")
pooled_max_pretrained = np.load("anno_mi_pretrained_BERT_max.npy")
pooled_sum_pretrained = np.load("anno_mi_pretrained_BERT_sum.npy")
pooled_cls_pretrained = np.load("anno_mi_pretrained_BERT_cls.npy")

In [19]:
bert_model = "bert-base-uncased"

In [25]:
text_encoder_pretrained_BERT = nlpsig.TextEncoder(df=anno_mi,
                                                  feature_name="utterance_text",
                                                  model_name=bert_model)
text_encoder_pretrained_BERT.load_pretrained_model()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [26]:
text_encoder_pretrained_BERT.tokenize_text(skip_special_tokens=False)

[INFO] Setting return_special_tokens_mask=True
[INFO] Tokenizing the dataset...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Saving the tokenized text for each sentence into `.df['tokens']`...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Creating tokenized dataframe and setting in `.tokenized_df` attribute...
[INFO] Note: 'text_id' is the column name for denoting the corresponding text id


Dataset({
    features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask', 'tokens'],
    num_rows: 13551
})

In [27]:
token_embeddings_pretrained = text_encoder_pretrained_BERT.obtain_embeddings(method="hidden_layer")

  0%|          | 0/136 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [28]:
pooled_mean_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings()
pooled_max_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings(method="max")
pooled_sum_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings(method="sum") 
pooled_cls_pretrained = text_encoder_pretrained_BERT.pool_token_embeddings(method="cls")

  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/13551 [00:00<?, ?it/s]

In [29]:
pooled_mean_pretrained.shape

(13551, 768)

In [30]:
pooled_max_pretrained.shape

(13551, 768)

In [31]:
pooled_sum_pretrained.shape

(13551, 768)

In [32]:
pooled_cls_pretrained.shape

(13551, 768)

In [33]:
np.save("anno_mi_pretrained_BERT_mean", pooled_mean_pretrained)
np.save("anno_mi_pretrained_BERT_max", pooled_max_pretrained)
np.save("anno_mi_pretrained_BERT_sum", pooled_sum_pretrained)
np.save("anno_mi_pretrained_BERT_cls", pooled_cls_pretrained)

## Fine-tuning BERT and pooling

### (Ignoring this part for now while, but will run this on GPU cluster soon...)

In [20]:
# pooled_mean = np.load("anno_mi_BERT_mean.npy")
# pooled_max = np.load("anno_mi_BERT_max.npy")
# pooled_sum = np.load("anno_mi_BERT_sum.npy")
# pooled_cls = np.load("anno_mi_BERT_cls.npy")

In [35]:
from transformers import (
    AutoModelForMaskedLM,
    AutoTokenizer,
    DataCollatorWithPadding,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    pipeline,
)

model = AutoModelForMaskedLM.from_pretrained(bert_model)
tokenizer = AutoTokenizer.from_pretrained(bert_model)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [36]:
text_encoder_BERT = nlpsig.TextEncoder(df=anno_mi,
                                       feature_name="utterance_text",
                                       model=model,
                                       tokenizer=tokenizer,
                                       data_collator=data_collator)

In [37]:
text_encoder_BERT.tokenize_text(skip_special_tokens=False)

[INFO] Setting return_special_tokens_mask=True
[INFO] Tokenizing the dataset...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Saving the tokenized text for each sentence into `.df['tokens']`...


Map:   0%|          | 0/13551 [00:00<?, ? examples/s]

[INFO] Creating tokenized dataframe and setting in `.tokenized_df` attribute...
[INFO] Note: 'text_id' is the column name for denoting the corresponding text id


Dataset({
    features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask'],
    num_rows: 13551
})

### Training the model

In [38]:
# set up data_collator for language modelling (has dynamic padding)
data_collator_for_LM = DataCollatorForLanguageModeling(tokenizer=tokenizer,
                                                       mlm=True,
                                                       mlm_probability=0.15)

In [39]:
text_encoder_BERT.split_dataset(seed=seed)

[INFO] Splitting up dataset into train / validation / test sets, and saving to `.dataset_split`.


DatasetDict({
    train: Dataset({
        features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask'],
        num_rows: 10840
    })
    test: Dataset({
        features: ['mi_quality', 'transcript_id', 'topic', 'utterance_id', 'interlocutor', 'timestamp', 'utterance_text', 'annotator_id', 'therapist_input_exists', 'therapist_input_subtype', 'reflection_exists', 'reflection_subtype', 'question_exists', 'question_subtype', 'main_therapist_behaviour', 'client_talk_type', 'datetime', 'tokens', 'input_ids', 'token_type_ids', 'attention_mask', 'special_tokens_mask'],
        num_rows: 1356
    })
    validation: Dataset({
        features: ['mi_qua

In [40]:
type(text_encoder_BERT.dataset_split)

datasets.dataset_dict.DatasetDict

In [41]:
model_name = "bert-base-uncased-anno-mi"
text_encoder_BERT.set_up_training_args(output_dir=model_name,
                                  num_train_epochs=600,
                                  per_device_train_batch_size=128,
                                  disable_tqdm=False,
                                  save_strategy="steps",
                                  save_steps=10000,
                                  seed=seed)

[INFO] Setting up TrainingArguments object and saving to `.training_args`.


TrainingArguments(
_n_gpu=0,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ign

In [42]:
type(text_encoder_BERT.training_args)

transformers.training_args.TrainingArguments

In [43]:
text_encoder_BERT.set_up_trainer(data_collator=data_collator_for_LM)

[INFO] Setting up Trainer object, and saving to `.trainer`.


<transformers.trainer.Trainer at 0x2b04c1e80>

In [44]:
type(text_encoder_BERT.trainer)

transformers.trainer.Trainer

In [45]:
torch.cuda.is_available()

False

In [46]:
torch.cuda.device_count()

0

In [47]:
# set to only report errors to avoid excessing logging
transformers.utils.logging.set_verbosity(40)

In [None]:
text_encoder_BERT.fit_transformer_with_trainer_api()

[INFO] Training model with 109514298 parameters...




Epoch,Training Loss,Validation Loss


In [None]:
text_encoder_BERT.trainer.save_model(model_name)

### Evaluating model on masked language modelling task

In [None]:
text_encoder_BERT.tokenizer.special_tokens_map

In [None]:
def compute_masked_character_accuracy(fill_mask, words):
    was_correct = []
    print(f"Evaluating with {len(words)} words")
    for word in tqdm(words):
        masked_strings = [word[:i] + '<mask>' + word[i+1:] for i in range(len(word))]
        predictions = [fill_mask(word)[0]['sequence'] for word in masked_strings]
        was_correct += [pred == word for pred in predictions]
    
    acc = np.sum(was_correct) / len(was_correct)
    print(f"Accuracy: {acc}")
    return acc

In [None]:
fill_mask = pipeline("fill-mask",
                     model=model_name,
                     tokenizer=model_name)

compute_masked_character_accuracy(fill_mask, text_encoder_BERT.dataset_split["test"]["word"])

### Obtain embeddings from model

In [None]:
# setting the model to CPU (might not be always necessary to run this)
text_encoder_BERT.model.to('cpu')
token_embeddings = text_encoder_BERT.obtain_embeddings(method="hidden_layer")

In [None]:
token_embeddings.shape

In [None]:
pooled_mean = text_encoder_BERT.pool_token_embeddings()
pooled_max = text_encoder_BERT.pool_token_embeddings(method="max")
pooled_sum = text_encoder_BERT.pool_token_embeddings(method="sum")
pooled_cls = text_encoder_BERT.pool_token_embeddings(method="cls")

In [None]:
pooled_mean.shape

In [None]:
pooled_max.shape

In [None]:
pooled_sum.shape

In [None]:
pooled_cls.shape

In [None]:
np.save("anno_mi_BERT_mean", pooled_mean)
np.save("anno_mi_BERT_max", pooled_max)
np.save("anno_mi_BERT_sum", pooled_sum)
np.save("anno_mi_BERT_cls", pooled_cls)

# Baseline 1: FFN baseline

Using the embeddings for the sentences directly in a FFN.

Below is a function that takes in some inputs x_data, y_data and fits a FFN. Will do early stopping if the F1 score continually gets worse.

In [20]:
def print_test_scores(model,
                      test_loader,
                      criterion,
                      verbose_shorten):
    print("* test performance:")
    # evaluate on test
    pred, label = testing_pytorch(model, test_loader, criterion)
    # compute F1 on test
    f1_scores = metrics.f1_score(label, pred, average=None)
    if not verbose_shorten:
        print(f"proportion of labels in prediction: {[sum(pred==i)/len(pred) for i in label_to_id.values()]}")
        print(f"proportion of labels in data: {[sum(label==i)/len(label) for i in label_to_id.values()]}")
    print(f"- f1: {f1_scores}")
    print(f"- f1 (average): {sum(f1_scores)/len(f1_scores)}")
    if not verbose_shorten:
        print(f"- accuracy: {sum(pred==label)/len(pred)}")

In [206]:
def implement_ffn(num_epochs,
                  x_data,
                  y_data,
                  hidden_dim,
                  dropout_rate,
                  learning_rate,
                  seed,
                  loss,
                  gamma=0,
                  data_split_seed=0,
                  verbose=True,
                  verbose_shorten=True):
    # set seed
    set_seed(seed)
    
    # initialise FFN
    ffn_model = FeedforwardNeuralNetModel(input_dim=x_data.shape[1],
                                          hidden_dim=hidden_dim,
                                          output_dim=len(label_to_id),
                                          dropout_rate=dropout_rate)
    # print(ffn_model)
    
    # split dataset
    if not isinstance(x_data, torch.Tensor):
        x_data = torch.tensor(x_data)
    if not isinstance(y_data, torch.Tensor):
        y_data = torch.tensor(y_data)
    train, valid, test = split_dataset(x_data=x_data.float(),
                                       y_data=y_data,
                                       train_size=0.8,
                                       valid_size=0.2,
                                       shuffle=True,
                                       as_DataLoader=True,
                                       seed=data_split_seed)

    # define loss
    if loss == "focal":
        criterion = FocalLoss(gamma = gamma)
    elif loss == "cross_entropy":
        criterion = torch.nn.CrossEntropyLoss()

    # define optimizer
    optimizer = torch.optim.Adam(ffn_model.parameters(), lr=learning_rate)
    # define scheduler for adjusting the learning rate
    scheduler = ReduceLROnPlateau(optimizer, 'min')
    # scheduler = StepLR(optimizer, step_size = 4, gamma = 0.5)
    # scheduler = None
    
    ffn_model = training_pytorch(model=ffn_model,
                                 train_loader=train,
                                 criterion=criterion,
                                 optimizer=optimizer,
                                 num_epochs=num_epochs,
                                 scheduler=scheduler,
                                 valid_loader=valid,
                                 seed=seed,
                                 save_best=True,
                                 early_stopping=True,
                                 validation_metric="f1",
                                 patience=10,
                                 verbose=False,
                                 verbose_epoch=100)
    
    # evaluate on validation
    loss_v, acc_v, f1_v = validation_pytorch(model=ffn_model,
                                             valid_loader=valid,
                                             criterion=criterion,
                                             epoch=0,
                                             verbose=False)
    
    # evaluate on test
    loss, acc, f1 = validation_pytorch(model=ffn_model,
                                       valid_loader=test,
                                       criterion=criterion,
                                       epoch=0,
                                       verbose=False)
    
    if verbose and not verbose_shorten:
        print("* validation performance:")
        print(f"- loss: {loss_v} | f1: {f1_v} | accuracy: {acc_v}")
    
    if verbose:
        print_test_scores(ffn_model, test, criterion, verbose_shorten)

    return ffn_model, (loss_v, acc_v, f1_v), (loss, acc, f1)

In [162]:
332

Going to try out some variations (1 hidden layer, 2 hidden layers and 3 hidden layers - all of size 100)

In [243]:
num_epochs = 100
hidden_dim_sizes = [[100]*i for i in range(1, 5)]
dropout_rates = [0.5, 0.2, 0.1]
learning_rates = [1e-3, 1e-4]
seeds = [0, 1, 12, 123, 1234]
loss = "cross_entropy"
validation_metric = "f1"

## SBERT 768

In [33]:
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=sbert_768_embeddings[client_index],
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6329081493334234
New best validation metric: 0.6329081493334234

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6226284573604087

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6318255306185577

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6186297274274043

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6302974458385493

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation)

## SBERT 384

In [34]:
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=sbert_384_embeddings[client_index],
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6233805395455548
New best validation metric: 0.6233805395455548

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6027316809142145

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6235233536737772
New best validation metric: 0.6235233536737772

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.599576724597856

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6220505116230728

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 

## Pretrained BERT

### Mean pooled

In [35]:
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=pooled_mean_pretrained[client_index],
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6184039979375753
New best validation metric: 0.6184039979375753

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6156284616166621

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6076458979888735

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6040788659946998

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6148801462563531

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation)

### Max pooled

In [36]:
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=pooled_max_pretrained[client_index],
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.42008821999910406
New best validation metric: 0.42008821999910406

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.588124365604539
New best validation metric: 0.588124365604539

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5863141492402314

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5911340172736954
New best validation metric: 0.5911340172736954

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5943695636586135
New best validation metric: 0.5943695

### Sum pooled

In [37]:
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=pooled_sum_pretrained[client_index],
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.41775751946565903
New best validation metric: 0.41775751946565903

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5942297044689182
New best validation metric: 0.5942297044689182

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5982773967073933
New best validation metric: 0.5982773967073933

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6136276168455159
New best validation metric: 0.6136276168455159

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.60663

### CLS

In [38]:
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=pooled_cls_pretrained[client_index],
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6049371554677608
New best validation metric: 0.6049371554677608

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.606407516462664
New best validation metric: 0.606407516462664

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6131397955810355
New best validation metric: 0.6131397955810355

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6070897950406762

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6144963557028424
New best validation metric: 0.614496355

## Fine-tuned BERT

### Mean pooled

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_mean_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=1e-5,
                  loss="cross_entropy")

### Max pooled

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_max_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### Sum pooled

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_sum_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

### CLS

In [None]:
for hidden_dim in hidden_dim_trials:
    print(f"\n********** hidden_dim: {hidden_dim}")
    implement_ffn(x_data=pooled_sum_pretrained[client_index],
                  y_data=y_data,
                  hidden_dim=hidden_dim,
                  learning_rate=learning_rate,
                  loss=loss)

# Baseline 2: Averaging history and use FFN

Here, we will use `nlpsig` to construct some paths of embeddings which we will average and use those in a FFN.

First, we define the arguments for how we want to construct our path. As we're going to just do a simple average of embeddings, I'll set zero padding as false, and construct the path by looking at the last `k` posts.

We will consider one where we average their histories and that is the only inputs to the FFN. Alternatively, we can concatenate the full post embedding as well.

In [39]:
path_specifics = {"pad_by": "history",
                  "zero_padding": False,
                  "method": "k_last",
                  "k": 10,
                  "time_feature": None,
                  "embeddings": "full",
                  "include_current_embedding": True}

In [40]:
def obtain_mean_history(embeddings, path_specifics, concatenate_current = True):
    paths = nlpsig.PrepareData(anno_mi,
                               id_column="transcript_id",
                               label_column="client_talk_type",
                               embeddings=embeddings)
    path = paths.pad(**path_specifics)
    # remove last two columns (which contains the id and the label)
    path = path[client_index][:,:,:-2]
    # average in the first dimension
    path = path.mean(1).astype("float")
    # concatenate with current embedding
    if concatenate_current:
        path = np.concatenate([path, embeddings[client_index]], axis=1)
    return path

## SBERT 768

In [41]:
path_history = obtain_mean_history(sbert_768_embeddings, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6336579104504892
New best validation metric: 0.6336579104504892

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6229154174885634

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6354887372964544
New best validation metric: 0.6354887372964544

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6202663904169482

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.629722492957916

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 

In [42]:
path_history = obtain_mean_history(sbert_768_embeddings, path_specifics, concatenate_current=False)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5446088332745634
New best validation metric: 0.5446088332745634

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.44715112883074404

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5477610963567903
New best validation metric: 0.5477610963567903

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5009650545361668

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5550629275169492
New best validation metric: 0.5550629275169492

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

## SBERT 384

In [43]:
path_history = obtain_mean_history(sbert_384_embeddings, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.629731436786898
New best validation metric: 0.629731436786898

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6150081425745879

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6222236237340872

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6134837195173752

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.622174242981423

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) me

In [44]:
path_history = obtain_mean_history(sbert_384_embeddings, path_specifics, concatenate_current=False)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5375324827777448
New best validation metric: 0.5375324827777448

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.39416718095332737

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5496305985947535
New best validation metric: 0.5496305985947535

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.46014893932447415

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5362814989244555

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0

## Pretrained BERT

### Mean pooled

In [45]:
path_history = obtain_mean_history(pooled_mean_pretrained, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6190062687452029
New best validation metric: 0.6190062687452029

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6202644024006689
New best validation metric: 0.6202644024006689

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6243087698760964
New best validation metric: 0.6243087698760964

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6177062321071929

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6287277942985299
New best validation metric: 0.6287277

### Max pooled

In [46]:
path_history = obtain_mean_history(pooled_max_pretrained, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.32392112173712684
New best validation metric: 0.32392112173712684

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5638085580142388
New best validation metric: 0.5638085580142388

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5650682297137204
New best validation metric: 0.5650682297137204

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6100470508982894
New best validation metric: 0.6100470508982894

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.60414

### Sum pooled

In [47]:
path_history = obtain_mean_history(pooled_sum_pretrained, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3337232539701499
New best validation metric: 0.3337232539701499

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5671612151384917
New best validation metric: 0.5671612151384917

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5747805365564674
New best validation metric: 0.5747805365564674

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6233954778648023
New best validation metric: 0.6233954778648023

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6094453

### CLS

In [48]:
path_history = obtain_mean_history(pooled_cls_pretrained, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6150542001394674
New best validation metric: 0.6150542001394674

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6346005173376803
New best validation metric: 0.6346005173376803

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.631959308477473

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6351430467800794
New best validation metric: 0.6351430467800794

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6268536739071289

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

## Fine-tuned BERT

### Mean pooled

In [None]:
path_history = obtain_mean_history(pooled_mean, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

### Max pooled

In [None]:
path_history = obtain_mean_history(pooled_max, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

### Sum pooled

In [None]:
path_history = obtain_mean_history(pooled_sum, path_specifics, concatenate_current=False)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

### CLS

In [None]:
path_history = obtain_mean_history(pooled_cls, path_specifics)
ffn_hyperparameter_search(num_epochs=num_epochs,
                          x_data=path_history,
                          y_data=y_data,
                          hidden_dim_sizes=hidden_dim_sizes,
                          dropout_rates=dropout_rates,
                          learning_rates=learning_rates,
                          seeds=seeds,
                          loss=loss,
                          validation_metric=validation_metric)

# Baseline 3: LSTM classification

# Baseline 4: FFN using signatures

First, we dimension reduce these and then take signatures. We use the path signature as input to the FFN for classification.

In [163]:
path_specifics = {"pad_by": "history",
                  "zero_padding": False,
                  "method": "k_last",
                  "k": 10,
                  "time_feature": None,
                  "embeddings": "dim_reduced",
                  "include_current_embedding": True}

In [164]:
def obtain_signatures_history(embeddings,
                              path_specifics,
                              method,
                              dimension,
                              sig_depth,
                              concatenate_current=True):
    # dimension reduce
    reduction = nlpsig.DimReduce(method=method, n_components=dimension)
    embeddings_reduced = reduction.fit_transform(embeddings, random_state=seed)
    
    paths = nlpsig.PrepareData(anno_mi,
                               id_column="transcript_id",
                               label_column="client_talk_type",
                               embeddings=embeddings,
                               embeddings_reduced=embeddings_reduced)
    path = paths.pad(**path_specifics)
    # remove last two columns (which contains the id and the label)
    path = path[client_index][:,:,:-2].astype("float")
    
    # convert to torch tensor to compute signature using signatory
    path = torch.from_numpy(path).float()
    sig = signatory.signature(path, sig_depth).float()
    
    # concatenate with current embedding
    if concatenate_current:
        sig = torch.cat([sig, torch.tensor(embeddings[client_index])], dim=1)

    return sig

In [169]:
def signature_ffn_baseline_hyperparameter_search(num_epochs,
                                                 path_specifics,
                                                 embeddings,
                                                 y_data,
                                                 dim_reduce_methods,
                                                 dimension_and_sig_depths,
                                                 hidden_dim_sizes,
                                                 dropout_rates,
                                                 learning_rates,
                                                 seeds,
                                                 loss,
                                                 gamma=0,
                                                 validation_metric="f1",
                                                 output="best_sig_ffn_model.pkl"):
    # initialise SaveBestModel class
    best_model = SaveBestModel(metric=validation_metric,
                               output=output,
                               verbose=True)
    
    for dimension, sig_depth in dimension_and_sig_depths:
        for method in dim_reduce_methods:
            print("\n" + "#" * 50)
            print(f"dimension: {dimension} | "
                  f"sig_depth: {sig_depth} | "
                  f"method: {method}")
            # obtain the ffn input by dimension reduction and computing signatures
            x_data = obtain_signatures_history(embeddings=embeddings,
                                               path_specifics=path_specifics,
                                               method=method,
                                               dimension=dimension,
                                               sig_depth=sig_depth,
                                               concatenate_current=True)

            # perform hyperparameter search for FFN
            model, best_valid_metric, FFN_info = ffn_hyperparameter_search(num_epochs,
                                                                           x_data=x_data,
                                                                           y_data=y_data,
                                                                           hidden_dim_sizes=hidden_dim_sizes,
                                                                           dropout_rates=dropout_rates,
                                                                           learning_rates=learning_rates,
                                                                           seeds=seeds,
                                                                           loss=loss,
                                                                           gamma=gamma,
                                                                           validation_metric=validation_metric,
                                                                           verbose=False)

            best_model(current_valid_metric=best_valid_metric,
                       model=model,
                       extra_info={"dimension": dimension,
                                   "sig_depth": sig_depth,
                                   "method": method,
                                   **FFN_info})

    checkpoint = torch.load(f=output)
    print("*" * 50)
    print("The best model had the following parameters:")
    print(checkpoint["extra_info"])
    
    # obtain the ffn input by dimension reduction and computing signatures
    x_data = obtain_signatures_history(embeddings=embeddings,
                                       path_specifics=path_specifics,
                                       method=checkpoint["extra_info"]["method"],
                                       dimension=checkpoint["extra_info"]["dimension"],
                                       sig_depth=checkpoint["extra_info"]["sig_depth"],
                                       concatenate_current=True)
    
    test_scores = []
    for seed in seeds:
        model, _, test_metrics = implement_ffn(num_epochs=num_epochs,
                                               x_data=x_data,
                                               y_data=y_data,
                                               hidden_dim=checkpoint["extra_info"]["hidden_dim"],
                                               dropout_rate=checkpoint["extra_info"]["dropout_rate"],
                                               learning_rate=checkpoint["extra_info"]["learning_rate"],
                                               loss=loss,
                                               gamma=gamma,
                                               seed=seed,
                                               verbose=False)
        # save metric that we want to use on validation set
        if validation_metric == "loss":
            test_scores.append(test_metrics[0])
        elif validation_metric == "accuracy":
            test_scores.append(test_metrics[1])
        elif validation_metric == "f1":
            test_scores.append(test_metrics[2])
        
    test_scores_mean = sum(test_scores)/len(test_scores)
    print(f"- Best model: average (test) metric score: {test_scores_mean}")
    print(f"scores: {test_scores}")
    return model, best_model.best_valid_metric, checkpoint["extra_info"]

In [286]:
dimensions_and_sig_depths = [(30, 2), (10, 3), (5, 4), (4, 5), (3, 6)]
dim_reduce_methods = ["gaussian_random_projection", "umap"]

In [171]:
def sig_terms(channels, depth):
    print([channels**i for i in range(1, depth+1)])
    return sum([channels**i for i in range(1, depth+1)])

[sig_terms(channels, depth) for (channels, depth) in dimensions_and_sig_depths]

[30, 900]
[10, 100, 1000]
[5, 25, 125, 625]
[4, 16, 64, 256, 1024]
[3, 9, 27, 81, 243, 729]


[930, 1110, 780, 1364, 1092]

## SBERT 768

In [172]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=sbert_768_embeddings,
                                             y_data=y_data,
                                             dim_reduce_methods=["umap"],
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: umap
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5711516284099143

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5905183918215873

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.592648142995164

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5908882732859584

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5900360905464689

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5949785371406419

!!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3912504792375362

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3386923250010961

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5150341477172734

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4565141823059161

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5238276068450679

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4588847596748161

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.32703964575541755

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.32626346027183806

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3528201998059963

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.36661028282805697

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.38078788385991913

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.38008505637032264

!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3437758562898536

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.34690466873925385

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.35188674668490233

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3802173813547331

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3722019257878254

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3885049576566241

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3506492406679099

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.343854747457841

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.35547861934247144

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.36758951055871386

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.37145083740897966

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3638348815247593

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.5634969182902727
scores: [0.5728099280611926, 0.5574934418965506, 0.5540324155092528, 0.5650246768142332, 0.5681241291701346]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1698, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.1, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.1, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.5949785371406419,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'umap',
  'hidden_dim': [100],
  'dropout_rate': 0.1,
  'learning_rate': 0.0001,
  'seed': 1234})

In [173]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=sbert_768_embeddings,
                                             y_data=y_data,
                                             dim_reduce_methods=["gaussian_random_projection"],
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6332841258955344

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.6060754748879138

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6315716501689135

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6084819335228902

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6277975034889711

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.604607091983233

!!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5946656132020516

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5700106424700511

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5976649724202785

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5777628677426256

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5917319151213754

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5766649880581706

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6000501688846704

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5809232259037047

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5925679215752553

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5846316017723161

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5921406289150484

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5879858812987653

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5906673110536758

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5570626165698885

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5798794196288243

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5642290176032956

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5803430745992395

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5678846059478133

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5853295707830112

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5624318537435885

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5788141915407888

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5666408487706127

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5796361614222059

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5658901965845657

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.6205109797054182
scores: [0.632515167470956, 0.624259816633438, 0.6201743633858078, 0.6228824447617941, 0.6027231062750951]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1698, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.5, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.5, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.6332841258955344,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'gaussian_random_projection',
  'hidden_dim': [100],
  'dropout_rate': 0.5,
  'learning_rate': 0.001,
  'seed': 1234})

## SBERT 384

In [174]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=sbert_384_embeddings,
                                             y_data=y_data,
                                             dim_reduce_methods=["umap"],
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: umap
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5616462802263444

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5205274064639462

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5737460097677902

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5679433250629167

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5774099856770114

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5709231664961026

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.37828659632170447

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3243111744394435

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5117059817281595

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.40626041837426496

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5118398757761796

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.42324378730683854

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.35012486323014425

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.34283827628660984

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3602100808201175

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37240192638135167

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.38436607676850654

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.38508711548590246

!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.34831965510528295

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.36408899764800334

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.36783935256366784

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.38417926977194966

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3716415518095381

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3899173934188607

!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.34976603464215644

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.33383262258237084

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.35519571129116706

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.35807185468236097

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.36375360688889835

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3667527901468871

!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.571433454058652
scores: [0.570305761195345, 0.5621418991898993, 0.5719888677521174, 0.5850447974962693, 0.5676859446596288]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1314, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.1, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
     (1): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
     (1): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.1, inplace=False)
     (1): Dropout(p=0.1, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.5797441815382306,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'umap',
  'hidden_dim': [100, 100],
  'dropout_rate': 0.1,
  'learning_rate': 0.001,
  'seed': 1234})

In [175]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=sbert_384_embeddings,
                                             y_data=y_data,
                                             dim_reduce_methods=["gaussian_random_projection"],
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6215144348740584

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.580555620435603

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6216593262920163

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5904354367327488

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6152213405758956

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5897316398739492

!!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5800665533506869

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5639128475218345

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5774944284772575

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5639098974454357

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5776041408842082

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5652464059838658

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6050424143319286

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5766590875947737

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6018545733310756

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5785274196368866

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5974932840575402

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5728346973708488

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5661753917485325

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5429065848037445

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5697624023817698

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5442123362605154

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5708869067118592

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5474189028347491

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5625283754989221

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5325964188182526

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5585851382555467

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5359513270895951

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5597985707522288

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5325673967581769

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.6095822737864908
scores: [0.6133803541623524, 0.6082447202402134, 0.6100346475290354, 0.6107418080026416, 0.6055098389982111]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1314, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.2, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.2, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.6216593262920163,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'gaussian_random_projection',
  'hidden_dim': [100],
  'dropout_rate': 0.2,
  'learning_rate': 0.001,
  'seed': 1234})

## Pretrained BERT

### Mean pooled

In [176]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_mean_pretrained,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6059266169937982

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5916409201963615

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6126597922353338

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5979535964435849

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6094295295931886

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5962118260453797

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5700237336307468

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5967046881819968

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5912676302293706

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6005072749799396

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5834512601010601

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.6020799957523746

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.46189778911866747

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.39296002908191185

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.505093948509024

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.44611248434949075

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4987762206780711

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4491998021630007

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.32182527843620756

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.35623541180687057

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5246790093883821

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4457918975623869

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5224177143003516

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.45481625975656376

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.31424164571611507

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.33225284121627063

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.4243725021698904

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3864230067706246

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4526150236461934

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3913084194838925

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.34502951240544166

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3479528809012056

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3708074186699335

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.39771648371426127

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4643584597188483

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4008938763017439

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.34529982932564

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3632003307728149

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3778232128855603

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3950820104314708

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3940974304721189

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4173985246875203

!!!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3473678794616082

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3484984385774934

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.36475869591385707

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37197342130199634

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.37208456898974873

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3886533503045601

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.33969330675904075

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.33326606628263933

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3706765733357391

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37421846311819096

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3815117870583924

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.37920964502008325

!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3541627013819682

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.33845061777171453

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3741801763841374

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3726973972735326

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.37429677168353953

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.37765587381113824

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.5999860996430869
scores: [0.6036432205787045, 0.5836248418849376, 0.6072993737351536, 0.6019078309977276, 0.6034552310189114]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1698, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.5, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
     (1): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
     (1): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.5, inplace=False)
     (1): Dropout(p=0.5, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.6139997667692132,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'gaussian_random_projection',
  'hidden_dim': [100, 100],
  'dropout_rate': 0.5,
  'learning_rate': 0.001,
  'seed': 1234})

### Max pooled

In [177]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_max_pretrained,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.537881609626655

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5476732580075244

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.578170101715539

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5743697177854266

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5793770211598044

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5719237714509803

!!!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.49694253788019677

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5408754845210442

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5466507321424031

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5590071218330234

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5598419401841863

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5570704550345387

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3728665169526611

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3390998104446016

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.4510057938575665

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.41346487713240665

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.46020723527273705

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.40871536009293347

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.2976103822313012

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3320410887139608

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.4695359764778548

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.42853404984489785

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4985311189562206

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.45343742903941686

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.34150391303441374

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.35074092221860537

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.4024333477731933

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.39972289552732904

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4251205452428543

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.40740827545382813

!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3287768067135046

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3291778140922983

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3595984989626101

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.38358373676826696

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4185547371218744

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3948296592462111

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3500645170692823

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3550985813389981

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3815471560423149

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37595501626325617

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3760886091909143

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.39832037088739114

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3523750426159291

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.35344746644635705

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3702541099359415

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3789858264727477

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3720591344622594

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.38700510413665784

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3599723171707046

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.34199744060103954

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3712119401589658

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.36713237910075136

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3789008190706763

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.37502409610620263

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3539682956002347

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3365315907054634

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3660004777730571

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3717182564101741

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.37490006329326947

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3700704527667084

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.5705852185763642
scores: [0.5726232063262305, 0.5629806667828555, 0.5734961849572446, 0.5813265961143207, 0.5624994387011701]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1698, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.2, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
     (1): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
     (1): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.2, inplace=False)
     (1): Dropout(p=0.2, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.5855536975394757,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'gaussian_random_projection',
  'hidden_dim': [100, 100],
  'dropout_rate': 0.2,
  'learning_rate': 0.001,
  'seed': 1234})

### Sum pooled

In [178]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_sum_pretrained,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.36774152382340475

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3971767789263786

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.4219475662619624

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4854896688904159

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4442292724166476

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5045087993643476

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.42453730954466506

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.618180271980832

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6053001921863033

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6217352291147529

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6028902321113405

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.6198069463531829

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3688877366740567

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3981593230866595

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.4009467745717677

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4423230806199424

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.41527530804214974

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4518947601584033

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.4346614459648088

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.4219303810490603

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5737741196303093

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5902194706845415

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5800930957055724

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5850770907129953

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3643110884414613

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.353097090751244

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.38357557966897554

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3789837690132359

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.386217987904651

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.384203177060051

!!!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.32905635192612415

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3696486406324052

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5150826141417457

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4960742288226265

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5389206575044388

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5182848990609481

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3764616527822466

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.35998676003100083

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3852869682312602

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37730138837155713

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3945235368014211

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.37689476090344487

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.36175253446895117

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.37279054596312833

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.38899455953296325

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4332206558201543

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4054121187247445

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.46686667124376735

!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.37121305413029765

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3519180981899754

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3724967341410609

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3779562777614639

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.38184944303261537

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.36902478888687124

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.36878220547218454

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.37111505689776336

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.383585594036672

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.39922435541425283

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3929945491094727

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4236996985012421

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.6270438249947614
scores: [0.6291888863743132, 0.6205326294398065, 0.6289720183814411, 0.629191367928499, 0.6273342228497475]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1698, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.1, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
     (1): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
     (1): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.1, inplace=False)
     (1): Dropout(p=0.1, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.6265557838828765,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'umap',
  'hidden_dim': [100, 100],
  'dropout_rate': 0.1,
  'learning_rate': 0.0001,
  'seed': 1234})

### CLS

In [179]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_cls_pretrained,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.6138978542358715

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5974296054488617

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.6192558156549862

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.6041804097407208

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.6137133290187207

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.6056471125547538

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.5774196415013894

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.5888988443398204

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5971065416329707

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.5926958854918859

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5880210084396732

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5934553241335979

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.47960890777320764

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.4056681018633778

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5399036499162327

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4752713847245527

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5311685832692311

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.498570771340548

!!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.29608707560249725

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.35665863979487994

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5434041622371317

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.45076109406854525

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5362282166744261

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.46642267379004254

!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3181109286824774

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.34260894254545216

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.43414746886625244

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.3981720106597106

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4641792284546418

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4026929733786192

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3238771658473422

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.32443278063777814

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3649306387488989

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37643996189069817

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4299021807880883

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.39318975426163705

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3540915744326994

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3669661591620589

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.37929190263724766

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.38357744819093503

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.4061476747794253

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.407189304148219

!!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3617102010465812

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3495906715545142

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3697793799767402

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.39681669225198246

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.38379550163525067

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.4057559290753835

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3495080242154929

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.34065685676827173

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3690376848206738

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.35886564450920544

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.3777826916673087

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.36154620144295946

!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.001
- average (validation) metric score: 0.3544287985361132

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.5 | learning_rate: 0.0001
- average (validation) metric score: 0.3384479621128692

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.3572887024524511

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.37936853502854884

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.37292284264184683

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
hidden_dim: [100] | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3920893752359226

!!!!!!!!!!

  0%|          | 0/13551 [00:00<?, ?it/s]

- Best model: average (test) metric score: 0.6156088174403453
scores: [0.6068369315171221, 0.6330327741191857, 0.6172676160011962, 0.5900818607086743, 0.630824904855549]


(FeedforwardNeuralNetModel(
   (input_layer): Linear(in_features=1698, out_features=100, bias=True)
   (relu): ReLU()
   (dropout): Dropout(p=0.2, inplace=False)
   (linear_layers): ModuleList(
     (0): Linear(in_features=100, out_features=100, bias=True)
   )
   (non_linear_layers): ModuleList(
     (0): ReLU()
   )
   (dropout_layers): ModuleList(
     (0): Dropout(p=0.2, inplace=False)
   )
   (final_layer): Linear(in_features=100, out_features=3, bias=True)
 ),
 0.6192558156549862,
 {'dimension': 30,
  'sig_depth': 2,
  'method': 'gaussian_random_projection',
  'hidden_dim': [100],
  'dropout_rate': 0.2,
  'learning_rate': 0.001,
  'seed': 1234})

### Fine-tuned BERT

### Mean pooled

In [None]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_mean,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)

### Max pooled

In [None]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_max,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)

### Sum pooled

In [None]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_sum,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)

### CLS

In [None]:
signature_ffn_baseline_hyperparameter_search(num_epochs=num_epochs,
                                             path_specifics=path_specifics,
                                             embeddings=pooled_cls,
                                             y_data=y_data,
                                             dim_reduce_methods=dim_reduce_methods,
                                             dimension_and_sig_depths=dimensions_and_sig_depths,
                                             hidden_dim_sizes=hidden_dim_sizes,
                                             dropout_rates=dropout_rates,
                                             learning_rates=learning_rates,
                                             seeds=seeds,
                                             loss=loss,
                                             validation_metric=validation_metric)

# StackedDeepSigNet

## Obtaining path by looking at post history

We can obtain a path by looking at the history of each post. Here we look at the last 10 posts (and pad with vectors of zeros if there are less than 10 posts) including the current post.

We only want to consider paths that correspond to a client's utterance as we want to model a change in mood at that time. Their history will still contain the therapist's utterances too.

In [244]:
time_features = ["time_encoding", "timeline_index"]
path_specifics = {"pad_by": "history",
                  "zero_padding": True,
                  "method": "k_last",
                  "k": 10,
                  "time_feature": time_features,
                  "standardise_method": ["minmax", None],
                  "embeddings": "dim_reduced",
                  "include_current_embedding": True,
                  "pad_from_below": False}

In [288]:
sbert_384_embeddings.shape

(13551, 384)

In [289]:
def obtain_SDSN_input(embeddings,
                      path_specifics,
                      method,
                      dimension):
    # dimension reduce
    if dimension == embeddings.shape[1]:
        embeddings_reduced = embeddings
    else:
        reduction = nlpsig.DimReduce(method=method, n_components=dimension)
        embeddings_reduced = reduction.fit_transform(embeddings, random_state=seed)
    
    paths = nlpsig.PrepareData(anno_mi,
                               id_column="transcript_id",
                               label_column="client_talk_type",
                               embeddings=embeddings,
                               embeddings_reduced=embeddings_reduced)
    
    paths.pad(**path_specifics)
    
    paths.array_padded = paths.array_padded[client_index]
    paths.embeddings = paths.embeddings[client_index]
    paths.embeddings_reduced = paths.embeddings_reduced[client_index]
    
    return paths.get_torch_path_for_SDSN(
        include_time_features_in_path=True,
        include_time_features_in_input=True,
        include_embedding_in_input=True,
        reduced_embeddings=False
    )

In [258]:
def implement_sdsn(num_epochs,
                   x_data,
                   y_data,
                   sig_depth,
                   input_channels,
                   output_channels,
                   num_time_features,
                   embedding_dim,
                   output_dim,
                   lstm_hidden_dim,
                   ffn_hidden_dim,
                   BiLSTM,
                   dropout_rate,
                   learning_rate,
                   loss,
                   gamma=0,
                   augmentation_type="Conv1d",
                   comb_method="concatenation",
                   data_split_seed=0,
                   verbose=True,
                   verbose_shorten=True):
    # set seed
    set_seed(seed)
    
    # initialise SDSN
    SDSN_args = {
        "input_channels": input_channels,
        "output_channels": output_channels,
        "num_time_features": num_time_features,
        "embedding_dim": embedding_dim,
        "sig_depth": sig_depth,
        "hidden_dim_lstm": lstm_hidden_dim,
        "hidden_dim_ffn": ffn_hidden_dim,
        "output_dim": output_dim,
        "dropout_rate": dropout_rate,
        "augmentation_type": augmentation_type,
        "BiLSTM": BiLSTM,
        "comb_method": "concatenation"
    }
    sdsn_model = StackedDeepSigNet(**SDSN_args)
    # print(sdsn_model)
    
    # split dataset
    if not isinstance(x_data, torch.Tensor):
        x_data = torch.tensor(x_data)
    if not isinstance(y_data, torch.Tensor):
        y_data = torch.tensor(y_data)
    train, valid, test = split_dataset(x_data=x_data.float(),
                                       y_data=y_data,
                                       train_size=0.8,
                                       valid_size=0.2,
                                       shuffle=True,
                                       as_DataLoader=True,
                                       seed=data_split_seed)
    
    # define loss
    if loss == "focal":    
        criterion = FocalLoss(gamma = gamma)
    elif loss == "cross_entropy":
        criterion = torch.nn.CrossEntropyLoss()

    # define optimizer
    optimizer = torch.optim.Adam(sdsn_model.parameters(), lr=learning_rate)
    # define scheduler for adjusting the learning rate
    scheduler = ReduceLROnPlateau(optimizer, 'min')
    # scheduler = StepLR(optimizer, step_size = 10, gamma = 0.5)
    # scheduler = CosineAnnealingWarmRestarts(optimizer, 
    #                                         T_0 = 8,# Number of iterations for the first restart
    #                                         T_mult = 1, # A factor increases TiTi after a restart
    #                                         eta_min = learning_rate*0.1)
    # scheduler = None
    sdsn_model = training_pytorch(model=sdsn_model,
                                  train_loader=train,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  num_epochs=num_epochs,
                                  scheduler=scheduler,
                                  valid_loader=valid,
                                  seed=seed,
                                  save_best=True,
                                  early_stopping=True,
                                  validation_metric="f1",
                                  patience=10,
                                  verbose=False)

    # evaluate on validation
    loss_v, acc_v, f1_v = validation_pytorch(model=sdsn_model,
                                             valid_loader=valid,
                                             criterion=criterion,
                                             epoch=0,
                                             verbose=False)
    
    # evaluate on test
    loss, acc, f1 = validation_pytorch(model=sdsn_model,
                                       valid_loader=test,
                                       criterion=criterion,
                                       epoch=0,
                                       verbose=False)
    
    if verbose and not verbose_shorten:
        print("* validation performance:")
        print(f"- loss: {loss_v} | f1: {f1_v} | accuracy: {acc_v}")
    
    if verbose:
        print_test_scores(ffn_model, test, criterion, verbose_shorten)

    return sdsn_model, (loss_v, acc_v, f1_v), (loss, acc, f1)

In [295]:
def sdsn_hyperparameter_search(num_epochs,
                               path_specifics,
                               embeddings,
                               y_data,
                               dim_reduce_methods,
                               dimensions,
                               sig_depths,
                               conv_output_channels,
                               num_time_features,
                               embedding_dim,
                               output_dim,
                               lstm_hidden_dim_sizes,
                               ffn_hidden_dim_sizes,
                               dropout_rates,
                               learning_rates,
                               BiLSTM,
                               seeds,
                               loss,
                               gamma=0,
                               augmentation_type="Conv1d",
                               comb_method="concatenation",
                               validation_metric="f1",
                               output="best_sdsn_model.pkl",
                               verbose=True):
    # initialise SaveBestModel class
    save_best_model = SaveBestModel(metric=validation_metric,
                                    output=output,
                                    verbose=True)
    
    # find model parameters that has the best validation
    for dimension in dimensions:
        for method in dim_reduce_methods:
            print("\n" + "#" * 50)
            print(f"dimension: {dimension} | "
                  f"method: {method}")
            x_data, input_channels = obtain_SDSN_input(embeddings=embeddings,
                                                       path_specifics=path_specifics,
                                                       method=method,
                                                       dimension=dimension)

            for lstm_hidden_dim in lstm_hidden_dim_sizes:
                for ffn_hidden_dim in ffn_hidden_dim_sizes:
                    for sig_depth in sig_depths:
                        for output_channels in conv_output_channels:
                            for dropout in dropout_rates:
                                for lr in learning_rates:
                                    print("\n" + "!" * 50)
                                    print(f"lstm_hidden_dim: {lstm_hidden_dim} | "
                                          f"ffn_hidden_dim: {ffn_hidden_dim} | "
                                          f"sig_depth: {sig_depth} | "
                                          f"output_channels: {output_channels} | "
                                          f"dropout: {dropout} | "
                                          f"learning_rate: {lr}")
                                    scores = []
                                    for seed in seeds:
                                        model, valid_metrics, _ = implement_sdsn(
                                            num_epochs=num_epochs,
                                            x_data=x_data,
                                            y_data=y_data,
                                            sig_depth=sig_depth,
                                            input_channels=input_channels,
                                            output_channels=output_channels,
                                            num_time_features=num_time_features,
                                            embedding_dim=embedding_dim,
                                            output_dim=output_dim,
                                            lstm_hidden_dim=lstm_hidden_dim,
                                            ffn_hidden_dim=ffn_hidden_dim,
                                            BiLSTM=BiLSTM,
                                            dropout_rate=dropout,
                                            learning_rate=lr,
                                            loss=loss,
                                            gamma=gamma,
                                            augmentation_type=augmentation_type,
                                            comb_method=comb_method,
                                            verbose=False,
                                        )

                                        # save metric that we want to use on validation set
                                        if validation_metric == "loss":
                                            scores.append(valid_metrics[0])
                                        elif validation_metric == "accuracy":
                                            scores.append(valid_metrics[1])
                                        elif validation_metric == "f1":
                                            scores.append(valid_metrics[2])

                                    scores_mean = sum(scores)/len(scores)
                                    print(f"- average (validation) metric score: {scores_mean}")
                                    # save best model according to averaged metric over the different seeds
                                    save_best_model(current_valid_metric=scores_mean,
                                                    model=model,
                                                    extra_info={"dimensions": dimension,
                                                                "sig_depth": sig_depth,
                                                                "method": method,
                                                                "input_channels": input_channels,
                                                                "output_channels": output_channels,
                                                                "num_time_features": num_time_features,
                                                                "embedding_dim": embedding_dim,
                                                                "lstm_hidden_dim": lstm_hidden_dim,
                                                                "ffn_hidden_dim": ffn_hidden_dim,
                                                                "dropout_rate": dropout,
                                                                "learning_rate": lr,
                                                                "seed": seed,
                                                                "BiLSTM": BiLSTM,
                                                                "loss": loss,
                                                                "gamma": gamma,
                                                                "augmentation_type": augmentation_type,
                                                                "comb_method": comb_method})
                    
    checkpoint = torch.load(f=output)
    if verbose:
        print("*" * 50)
        print("The best model had the following parameters:")
        print(checkpoint["extra_info"])
    
    x_data, input_channels = obtain_SDSN_input(embeddings=embeddings,
                                               path_specifics=path_specifics,
                                               method=method,
                                               dimension=dimension)
    
    test_scores = []
    for seed in seeds:
        model, _, test_metrics = implement_sdsn(
            num_epochs=num_epochs,
            x_data=x_data,
            y_data=y_data,
            sig_depth=checkpoint["extra_info"]["sig_depth"],
            input_channels=checkpoint["extra_info"]["input_channels"],
            output_channels=checkpoint["extra_info"]["output_channels"],
            num_time_features=num_time_features,
            embedding_dim=embedding_dim,
            output_dim=output_dim,
            lstm_hidden_dim=checkpoint["extra_info"]["lstm_hidden_dim"],
            ffn_hidden_dim=checkpoint["extra_info"]["ffn_hidden_dim"],
            BiLSTM=checkpoint["extra_info"]["BiLSTM"],
            dropout_rate=checkpoint["extra_info"]["dropout_rate"],
            learning_rate=checkpoint["extra_info"]["learning_rate"],
            loss=checkpoint["extra_info"]["loss"],
            gamma=checkpoint["extra_info"]["gamma"],
            augmentation_type=checkpoint["extra_info"]["augmentation_type"],
            comb_method=checkpoint["extra_info"]["comb_method"],
            verbose=False
        )

    # save metric that we want to use on validation set
    if validation_metric == "loss":
        test_scores.append(test_metrics[0])
    elif validation_metric == "accuracy":
        test_scores.append(test_metrics[1])
    elif validation_metric == "f1":
        test_scores.append(test_metrics[2])
        
    test_scores_mean = sum(test_scores)/len(test_scores)
    if verbose:
        print(f"- Best model: average (test) metric score: {test_scores_mean}")
        print(f"scores: {test_scores}")
        
    return model, save_best_model.best_valid_metric, checkpoint["extra_info"]

In [298]:
sig_depths = [2,3]
dim_reduce_methods = ["gaussian_random_projection", "umap"]

In [297]:
output_dim = len(label_to_id)
lstm_hidden_dims = [[8,8], [12,12,8]]
num_time_features = len(time_features)
conv_output_channels = [20, 10, 5]
learning_rate = 1e-4

## SBERT 768

In [272]:
# x_data, input_channels = obtain_SDSN_input(embeddings=sbert_768_embeddings,
#                                            path_specifics=path_specifics,
#                                            method="gaussian_random_projection",
#                                            dimension=30)
# implement_sdsn(num_epochs=num_epochs,
#                    x_data=x_data,
#                    y_data=y_data,
#                    sig_depth=2,
#                    input_channels=input_channels,
#                    output_channels=20,
#                    num_time_features=num_time_features,
#                    embedding_dim=embedding_dim,
#                    output_dim=output_dim,
#                    lstm_hidden_dim=[8,8],
#                    ffn_hidden_dim=[100],
#                    BiLSTM=True,
#                    dropout_rate=0.1,
#                    learning_rate=0.0001,
#                    loss="cross_entropy")

In [None]:
sdsn_hyperparameter_search(num_epochs=num_epochs,
                           path_specifics=path_specifics,
                           embeddings=sbert_768_embeddings,
                           y_data=y_data,
                           dim_reduce_methods=["umap"],
                           dimensions=dimensions,
                           sig_depths=sig_depths,
                           conv_output_channels=conv_output_channels,
                           num_time_features=num_time_features,
                           embedding_dim=embedding_dim,
                           output_dim=output_dim,
                           lstm_hidden_dim_sizes=lstm_hidden_dims,
                           ffn_hidden_dim_sizes=hidden_dim_sizes,
                           dropout_rates=dropout_rates,
                           learning_rates=learning_rates,
                           BiLSTM=True,
                           seeds=seeds,
                           loss=loss,
                           validation_metric=validation_metric)

In [277]:
sdsn_hyperparameter_search(num_epochs=num_epochs,
                           path_specifics=path_specifics,
                           embeddings=sbert_768_embeddings,
                           y_data=y_data,
                           dim_reduce_methods=["gaussian_random_projection"],
                           dimensions=dimensions,
                           sig_depths=sig_depths,
                           conv_output_channels=conv_output_channels,
                           num_time_features=num_time_features,
                           embedding_dim=embedding_dim,
                           output_dim=len(label_to_id),
                           lstm_hidden_dim_sizes=lstm_hidden_dims,
                           ffn_hidden_dim_sizes=hidden_dim_sizes,
                           dropout_rates=dropout_rates,
                           learning_rates=learning_rates,
                           BiLSTM=True,
                           seeds=seeds,
                           loss=loss,
                           validation_metric=validation_metric)


##################################################
dimension: 30 | sig_depth: 2 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: 100 | output_channels: 20 | dropout: 0.1 | learning_rate: 0.01
- average (validation) metric score: 0.5119341369623539
New best validation metric: 0.5119341369623539

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: 100 | output_channels: 20 | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5917815379052254
New best validation metric: 0.5917815379052254
**************************************************
The best model had the following parameters:
{'dimensions': 30, 'sig_depth': 2, 'method': 'gaussian_random_projection', 'input_channels': 32, 'output_channels': 20, 'num_time_features': 2, 'embedding_dim': 768, 'lstm_hidden_dim': [8, 8], 'ffn

(StackedDeepSigNet(
   (conv): Conv1d(32, 20, kernel_size=(3,), stride=(1,), padding=(1,))
   (augment): Augment(
     activation=<function relu at 0x138c16430>, include_original=False, include_time=False
     (convs): ModuleList(
       (0): Conv1d(32, 20, kernel_size=(3,), stride=(1,), padding=(1,))
     )
   )
   (tanh1): Tanh()
   (signature_layers): ModuleList(
     (0): LogSignature(
       depth=2, stream=True, inverse=False, mode='words'
       (_signature_to_logsignature_instance): SignatureToLogSignature(channels=20, depth=2, stream=True, mode='words')
     )
     (1): LogSignature(
       depth=2, stream=True, inverse=False, mode='words'
       (_signature_to_logsignature_instance): SignatureToLogSignature(channels=8, depth=2, stream=True, mode='words')
     )
   )
   (lstm_layers): ModuleList(
     (0): LSTM(210, 8, batch_first=True)
     (1): LSTM(36, 8, batch_first=True, bidirectional=True)
   )
   (signature2): LogSignature(
     depth=2, stream=False, inverse=False, mod

## SBERT 384

In [299]:
dropout_rates = [0.2, 0.1]

In [300]:
embedding_dim = 384
dimensions = [embedding_dim, 100, 50, 30]

UMAP with BiLSTM at end of SWNU unit

In [301]:
sdsn_hyperparameter_search(num_epochs=num_epochs,
                           path_specifics=path_specifics,
                           embeddings=sbert_384_embeddings,
                           y_data=y_data,
                           dim_reduce_methods=["umap"],
                           dimensions=dimensions,
                           sig_depths=sig_depths,
                           conv_output_channels=conv_output_channels,
                           num_time_features=num_time_features,
                           embedding_dim=embedding_dim,
                           output_dim=len(label_to_id),
                           lstm_hidden_dim_sizes=lstm_hidden_dims,
                           ffn_hidden_dim_sizes=hidden_dim_sizes,
                           dropout_rates=dropout_rates,
                           learning_rates=learning_rates,
                           BiLSTM=True,
                           seeds=seeds,
                           loss=loss,
                           validation_metric=validation_metric)


##################################################
dimension: 384 | method: umap
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.532582409081268
New best validation metric: 0.532582409081268

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.2863580735921162

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 20 | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5306912137376146

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8]

- average (validation) metric score: 0.5611065054429275

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 2 | output_channels: 5 | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.3126157782254004

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 3 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5298529643565035

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 3 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.4292547714889075

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 3 | output_channels: 20 | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.571162163598209


- average (validation) metric score: 0.41751286558218864

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100, 100] | sig_depth: 3 | output_channels: 5 | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5448804708441536

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100, 100] | sig_depth: 3 | output_channels: 5 | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.43078955899833976

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100, 100, 100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.42041900885184463

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100, 100, 100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.0001
- average (validation) 

- average (validation) metric score: 0.4479878553529969

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [12, 12, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 5 | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.556485337126941

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [12, 12, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 5 | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.26016260162601623

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [12, 12, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 5 | dropout: 0.1 | learning_rate: 0.001
- average (validation) metric score: 0.5457947061204053

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [12, 12, 8] | ffn_hidden_dim: [100] | sig_depth: 2 | output_channels: 5 | dropout: 0.1 | learning_rate: 0.0001
- average (validation) metric score: 0.5146503106366996

!

KeyboardInterrupt: 

UMAP without BiLSTM at end of SWNU unit

In [None]:
sdsn_hyperparameter_search(num_epochs=num_epochs,
                           path_specifics=path_specifics,
                           embeddings=sbert_384_embeddings,
                           y_data=y_data,
                           dim_reduce_methods=["umap"],
                           dimensions=dimensions,
                           sig_depths=sig_depths,
                           conv_output_channels=conv_output_channels,
                           num_time_features=num_time_features,
                           embedding_dim=embedding_dim,
                           output_dim=len(label_to_id),
                           lstm_hidden_dim_sizes=lstm_hidden_dims,
                           ffn_hidden_dim_sizes=hidden_dim_sizes,
                           dropout_rates=dropout_rates,
                           learning_rates=learning_rates,
                           BiLSTM=False,
                           seeds=seeds,
                           loss=loss,
                           validation_metric=validation_metric)

Random projections with BiLSTM at end of SWNU unit

In [309]:
dimensions = [100, 50, 30]
hidden_dim_sizes = [[100]*i for i in range(1, 4)]
learning_rates = [0.005, 0.001, 0.0001]

In [308]:
learning_rates

[0.001, 0.0001]

In [306]:
hidden_dim_sizes

[[100, 100], [100, 100, 100]]

In [310]:
sdsn_hyperparameter_search(num_epochs=num_epochs,
                           path_specifics=path_specifics,
                           embeddings=sbert_384_embeddings,
                           y_data=y_data,
                           dim_reduce_methods=["gaussian_random_projection"],
                           dimensions=dimensions,
                           sig_depths=sig_depths,
                           conv_output_channels=conv_output_channels,
                           num_time_features=num_time_features,
                           embedding_dim=embedding_dim,
                           output_dim=len(label_to_id),
                           lstm_hidden_dim_sizes=lstm_hidden_dims,
                           ffn_hidden_dim_sizes=hidden_dim_sizes,
                           dropout_rates=dropout_rates,
                           learning_rates=learning_rates,
                           BiLSTM=True,
                           seeds=seeds,
                           loss=loss,
                           validation_metric=validation_metric)


##################################################
dimension: 100 | method: gaussian_random_projection
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.005
- average (validation) metric score: 0.5314331939309107
New best validation metric: 0.5314331939309107

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.001
- average (validation) metric score: 0.5550117941016246
New best validation metric: 0.5550117941016246

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
lstm_hidden_dim: [8, 8] | ffn_hidden_dim: [100, 100] | sig_depth: 2 | output_channels: 20 | dropout: 0.2 | learning_rate: 0.0001
- average (validation) metric score: 0.39997380227265283

!!!!!!!!!

KeyboardInterrupt: 

Random projections without BiLSTM at end of SWNU unit

In [None]:
sdsn_hyperparameter_search(num_epochs=num_epochs,
                           path_specifics=path_specifics,
                           embeddings=sbert_384_embeddings,
                           y_data=y_data,
                           dim_reduce_methods=["gaussian_random_projection"],
                           dimensions=dimensions,
                           sig_depths=sig_depths,
                           conv_output_channels=conv_output_channels,
                           num_time_features=num_time_features,
                           embedding_dim=embedding_dim,
                           output_dim=len(label_to_id),
                           lstm_hidden_dim_sizes=lstm_hidden_dims,
                           ffn_hidden_dim_sizes=hidden_dim_sizes,
                           dropout_rates=dropout_rates,
                           learning_rates=learning_rates,
                           BiLSTM=False,
                           seeds=seeds,
                           loss=loss,
                           validation_metric=validation_metric)

## Pretrained BERT

### Mean pooled

In [219]:
x_data, input_channels = obtain_SDSN_input(pooled_mean_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1083263158798218
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9703396558761597
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8777659806338224 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6071421504020691
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.29399019479751587
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8207249262116172 || Accuracy: 0.679049015045166 || F1-score: 0.53183287934843
Early stopping at epoch 119!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.7458651282570579
proportion of labels in prediction: [tensor(0.7128), tensor(0.1920), tensor(0.0952)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81514477 0.53763441 0.43113772]
- f1 (average): 0.5946389664

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1249394416809082
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9522772431373596
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.943194947459481 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7068532705307007
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.12456218898296356
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8211802244186401 || Accuracy: 0.6864784359931946 || F1-score: 0.5544798456325873
Early stopping at epoch 124!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.7713598229668357
proportion of labels in prediction: [tensor(0.6935), tensor(0.2098), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80677966 0.50859107 0.39285714]
- f1 (average): 0.56940928

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0913692712783813
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9906197786331177
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9287018830125983 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8286048769950867
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.20334593951702118
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8711824471300299 || Accuracy: 0.6523030996322632 || F1-score: 0.5035916812232601
Early stopping at epoch 122!
Accuracy on dataset of size 672: 68.45237731933594 %.
Average loss: 0.7822817184708335
proportion of labels in prediction: [tensor(0.6949), tensor(0.2128), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80361174 0.49829352 0.37575758]
- f1 (average): 0.5592209

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0686894655227661
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9680382013320923
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9435630494898016 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7456050515174866
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7053532004356384
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8338342850858514 || Accuracy: 0.6641901731491089 || F1-score: 0.42409309897265546
Early stopping at epoch 126!
Accuracy on dataset of size 672: 65.77381134033203 %.
Average loss: 0.8107903816483237
proportion of labels in prediction: [tensor(0.7545), tensor(0.2455), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80561555 0.43809524 0.        ]
- f1 (average): 0.41457026295

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.207420825958252
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9080084562301636
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8924839334054426 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.721686601638794
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7638161182403564
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8360991965640675 || Accuracy: 0.6701337099075317 || F1-score: 0.5284795665230447
Early stopping at epoch 131!
Accuracy on dataset of size 672: 67.85713958740234 %.
Average loss: 0.8351644006642428
proportion of labels in prediction: [tensor(0.7054), tensor(0.2068), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79507279 0.4982699  0.35802469]
- f1 (average): 0.5504557919

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1019978523254395
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9218642115592957
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9295949014750394 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6193119287490845
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.1335456371307373
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.824391554702412 || Accuracy: 0.6745913624763489 || F1-score: 0.5329199735449736
Early stopping at epoch 132!
Accuracy on dataset of size 672: 68.00595092773438 %.
Average loss: 0.8039264787327159
proportion of labels in prediction: [tensor(0.7098), tensor(0.1994), tensor(0.0908)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79910714 0.48591549 0.36585366]
- f1 (average): 0.550292098

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1113650798797607
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9515938758850098
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.932028591632843 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7567929029464722
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.13789458572864532
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8116248683495955 || Accuracy: 0.6805348992347717 || F1-score: 0.5538295414894473
Epoch: 201/10000 || Item: 0/85 || Loss: 0.636617124080658
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.47726404666900635
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.8049460432746194 || Accuracy: 0.6805348992347717 || F1-score: 0.5538295414894473
Epoch: 301/10000 |

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.110866665840149
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0323796272277832
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9433342760259454 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6905802488327026
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.6991264224052429
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8670690059661865 || Accuracy: 0.6448736786842346 || F1-score: 0.41140734684814256
Early stopping at epoch 122!
Accuracy on dataset of size 672: 64.28571319580078 %.
Average loss: 0.852442280812697
proportion of labels in prediction: [tensor(0.6979), tensor(0.3021), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7972973  0.44192635 0.        ]
- f1 (average): 0.4130745476354

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1104532480239868
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8665093183517456
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.893160110170191 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6507341861724854
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.34680408239364624
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8093088648535989 || Accuracy: 0.6656760573387146 || F1-score: 0.5295474762916609
Early stopping at epoch 119!
Accuracy on dataset of size 672: 68.60118865966797 %.
Average loss: 0.805629854852503
proportion of labels in prediction: [tensor(0.7113), tensor(0.1964), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80267559 0.4964539  0.37575758]
- f1 (average): 0.558295687

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1527830362319946
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9344223141670227
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9437594793059609 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6916497945785522
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.12838536500930786
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8135914260690863 || Accuracy: 0.668647825717926 || F1-score: 0.522173657439358
Early stopping at epoch 124!
Accuracy on dataset of size 672: 68.89881134033203 %.
Average loss: 0.7816357937726107
proportion of labels in prediction: [tensor(0.7009), tensor(0.1979), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81123596 0.50176678 0.3625731 ]
- f1 (average): 0.558525279

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0989179611206055
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.936793863773346
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9397207390178334 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7860616445541382
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.27014172077178955
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8227900916879828 || Accuracy: 0.6567607522010803 || F1-score: 0.5141564309003757
Early stopping at epoch 122!
Accuracy on dataset of size 672: 68.45237731933594 %.
Average loss: 0.7630500956015154
proportion of labels in prediction: [tensor(0.7158), tensor(0.1920), tensor(0.0923)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81111111 0.46594982 0.36363636]
- f1 (average): 0.54689909

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1213955879211426
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0499165058135986
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9505978280847723 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6903181672096252
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7371382117271423
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8040579394860701 || Accuracy: 0.6627042889595032 || F1-score: 0.4252061721417711
Early stopping at epoch 137!
Accuracy on dataset of size 672: 65.0297622680664 %.
Average loss: 0.8316478512503884
proportion of labels in prediction: [tensor(0.7173), tensor(0.2827), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79689234 0.45882353 0.        ]
- f1 (average): 0.4185719570847

### Max pooled

In [220]:
x_data, input_channels = obtain_SDSN_input(pooled_max_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0611822605133057
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.906973659992218
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9062382687221874 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7280529141426086
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.11842703819274902
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7983393940058622 || Accuracy: 0.679049015045166 || F1-score: 0.5531351637546328
Early stopping at epoch 141!
Accuracy on dataset of size 672: 66.36904907226562 %.
Average loss: 0.8046743111176924
proportion of labels in prediction: [tensor(0.7113), tensor(0.2054), tensor(0.0833)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7826087  0.47222222 0.33962264]
- f1 (average): 0.531484519

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0875803232192993
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9498467445373535
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.940553605556488 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6147798895835876
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.4620286226272583
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8429263559254733 || Accuracy: 0.6879643201828003 || F1-score: 0.5607682417229186
Early stopping at epoch 138!
Accuracy on dataset of size 672: 67.26190185546875 %.
Average loss: 0.894775005904111
proportion of labels in prediction: [tensor(0.6935), tensor(0.2173), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.77740113 0.50675676 0.40490798]
- f1 (average): 0.5630219540

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0729649066925049
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9746654033660889
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9413475285876881 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8841552734375
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3010278046131134
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8812506090510975 || Accuracy: 0.6300148367881775 || F1-score: 0.3900167628981188
Early stopping at epoch 117!
Accuracy on dataset of size 672: 62.7976188659668 %.
Average loss: 0.8637093468145891
proportion of labels in prediction: [tensor(0.7292), tensor(0.2708), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78327833 0.39759036 0.        ]
- f1 (average): 0.3936228964261887

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0652843713760376
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.020917534828186
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9387365471233021 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7338456511497498
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7304773926734924
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.819216397675601 || Accuracy: 0.6404160261154175 || F1-score: 0.40319943709774214
Early stopping at epoch 131!
Accuracy on dataset of size 672: 65.17857360839844 %.
Average loss: 0.8098824078386481
proportion of labels in prediction: [tensor(0.7143), tensor(0.2857), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80088988 0.45614035 0.        ]
- f1 (average): 0.4190100761730

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.043005108833313
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9269269704818726
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9087737690318715 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8126065731048584
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3956730365753174
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8617757721380754 || Accuracy: 0.6433877944946289 || F1-score: 0.4230753239530409
Early stopping at epoch 131!
Accuracy on dataset of size 672: 64.88095092773438 %.
Average loss: 0.8578461842103438
proportion of labels in prediction: [tensor(0.7292), tensor(0.2530), tensor(0.0179)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78547855 0.4375     0.15652174]
- f1 (average): 0.459833428

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1013391017913818
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9390515089035034
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9443628517064181 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6210097074508667
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.08356453478336334
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8513612042773854 || Accuracy: 0.6627042889595032 || F1-score: 0.5111893471108547
Early stopping at epoch 138!
Accuracy on dataset of size 672: 66.36904907226562 %.
Average loss: 0.8813020912083712
proportion of labels in prediction: [tensor(0.7158), tensor(0.2098), tensor(0.0744)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78666667 0.45360825 0.33986928]
- f1 (average): 0.5267147

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1034269332885742
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0152961015701294
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9484180916439403 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.879435122013092
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.20181603729724884
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8428544510494579 || Accuracy: 0.6523030996322632 || F1-score: 0.4039750957854406
Epoch: 201/10000 || Item: 0/85 || Loss: 0.6834601759910583
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.8086645603179932
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.8389423977244984 || Accuracy: 0.6523030996322632 || F1-score: 0.4039750957854406
Epoch: 301/10000 |

Early stopping at epoch 2446!
Accuracy on dataset of size 672: 64.43452453613281 %.
Average loss: 0.8683453310619701
proportion of labels in prediction: [tensor(0.7351), tensor(0.2649), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79079956 0.43902439 0.        ]
- f1 (average): 0.40994131737593387
- accuracy: 0.644345223903656

********** lstm_hidden_dim: [12, 12, 8] || ffnhidden_dim: [100, 100, 100, 100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.074674129486084
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9654678702354431
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9404152740131725 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7875795364379883
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7538393139839172
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.871634450825778 || Accuracy: 0.6419019103050232 || F1-score: 0.40243998252665786
Early stopping at epoch 127!
Accuracy on dataset of size 672: 62.7976188659668 %.
Average loss: 0.8363772413947366
proportion of labels in prediction: [tensor(0.7336), tensor(0.2664), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78289474 0.39513678 0.        ]
- f1 (average): 0.39267717165253

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.062619924545288
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0131422281265259
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9251405325802889 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7082000374794006
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.18760985136032104
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7862089655616067 || Accuracy: 0.6745913624763489 || F1-score: 0.5419391430208126
Early stopping at epoch 129!
Accuracy on dataset of size 672: 68.60118865966797 %.
Average loss: 0.7925098592584784
proportion of labels in prediction: [tensor(0.7545), tensor(0.1875), tensor(0.0580)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80561555 0.47101449 0.32394366]
- f1 (average): 0.53352456

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1187163591384888
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9843406081199646
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9430152882229198 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6623855829238892
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.196440652012825
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8494816801764749 || Accuracy: 0.6285289525985718 || F1-score: 0.4420697964387284
Early stopping at epoch 132!
Accuracy on dataset of size 672: 63.69047546386719 %.
Average loss: 0.8374193473295732
proportion of labels in prediction: [tensor(0.7217), tensor(0.2604), tensor(0.0179)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78761062 0.39384615 0.13913043]
- f1 (average): 0.440195736

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.125571846961975
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0724183320999146
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9471527934074402 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 1.0156971216201782
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.2595859467983246
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8206297105008905 || Accuracy: 0.6508172154426575 || F1-score: 0.39949537150815106
Early stopping at epoch 124!
Accuracy on dataset of size 672: 64.73213958740234 %.
Average loss: 0.814558663151481
proportion of labels in prediction: [tensor(0.7396), tensor(0.2604), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79475983 0.43692308 0.        ]
- f1 (average): 0.4105609674168

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0989123582839966
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.032710075378418
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9369079145518217 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7517858743667603
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7406988143920898
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8357667110183022 || Accuracy: 0.6270430684089661 || F1-score: 0.3898856990962254
Early stopping at epoch 120!
Accuracy on dataset of size 672: 63.83928680419922 %.
Average loss: 0.7939372441985391
proportion of labels in prediction: [tensor(0.7262), tensor(0.2738), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7938258  0.41317365 0.        ]
- f1 (average): 0.4023331506776

### Sum pooled

In [221]:
x_data, input_channels = obtain_SDSN_input(pooled_sum_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.3013468980789185
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8354977965354919
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8586074764078314 || Accuracy: 0.6404160261154175 || F1-score: 0.3696874014654228
Epoch: 101/10000 || Item: 0/85 || Loss: 0.5119310021400452
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.18847788870334625
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7998412143100392 || Accuracy: 0.7057949304580688 || F1-score: 0.5972343295973432
Early stopping at epoch 119!
Accuracy on dataset of size 672: 70.98213958740234 %.
Average loss: 0.8521335016597401
proportion of labels in prediction: [tensor(0.7247), tensor(0.1860), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81456954 0.50909091 0.46625767]
- f1 (average): 0.59663937

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1109387874603271
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8544552326202393
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9038361419330944 || Accuracy: 0.6433877944946289 || F1-score: 0.36858300812226386
Epoch: 101/10000 || Item: 0/85 || Loss: 0.38765737414360046
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.033970266580581665
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9083285440098156 || Accuracy: 0.6924219727516174 || F1-score: 0.5911978988564527
Early stopping at epoch 132!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.8130029873414473
proportion of labels in prediction: [tensor(0.6830), tensor(0.2277), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79726651 0.52145215 0.42944785]
- f1 (average): 0.58272

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0930824279785156
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9830728769302368
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9443454850803722 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.4144074022769928
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.15897485613822937
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9295072176239707 || Accuracy: 0.6968796253204346 || F1-score: 0.5974639910123781
Early stopping at epoch 127!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.9161134578964927
proportion of labels in prediction: [tensor(0.6682), tensor(0.2247), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79032258 0.52491694 0.48      ]
- f1 (average): 0.5984131

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1586970090866089
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.0439859628677368
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9710449901494113 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.3956676721572876
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.23449645936489105
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8783430619673296 || Accuracy: 0.6939078569412231 || F1-score: 0.5786841232219383
Early stopping at epoch 123!
Accuracy on dataset of size 672: 69.64286041259766 %.
Average loss: 0.8908329497684132
proportion of labels in prediction: [tensor(0.6786), tensor(0.2188), tensor(0.1027)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.8        0.53198653 0.45348837]
- f1 (average): 0.5951583

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.042363166809082
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7563664317131042
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8164049712094393 || Accuracy: 0.6567607522010803 || F1-score: 0.47700207906536257
Epoch: 101/10000 || Item: 0/85 || Loss: 0.4392238259315491
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.059755731374025345
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.857847273349762 || Accuracy: 0.689450204372406 || F1-score: 0.5943976893780728
Early stopping at epoch 119!
Accuracy on dataset of size 672: 70.38690185546875 %.
Average loss: 0.8413175073536959
proportion of labels in prediction: [tensor(0.6845), tensor(0.2098), tensor(0.1057)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80546075 0.52920962 0.48275862]
- f1 (average): 0.605809664

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1025233268737793
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9335659146308899
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9173165451396595 || Accuracy: 0.6433877944946289 || F1-score: 0.3906025239031299
Epoch: 101/10000 || Item: 0/85 || Loss: 0.3760767877101898
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.003959278576076031
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9313850998878479 || Accuracy: 0.6805348992347717 || F1-score: 0.5916768076746531
Early stopping at epoch 127!
Accuracy on dataset of size 672: 69.3452377319336 %.
Average loss: 0.9569546471942555
proportion of labels in prediction: [tensor(0.6652), tensor(0.2351), tensor(0.0997)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79445727 0.54545455 0.44705882]
- f1 (average): 0.59565688

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.088029146194458
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9533932209014893
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9125539443709634 || Accuracy: 0.6210995316505432 || F1-score: 0.2705276590251193
Epoch: 101/10000 || Item: 0/85 || Loss: 0.49797555804252625
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.21526889503002167
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9374861717224121 || Accuracy: 0.6953937411308289 || F1-score: 0.5977988382852487
Early stopping at epoch 127!
Accuracy on dataset of size 672: 68.30357360839844 %.
Average loss: 0.9290091612122275
proportion of labels in prediction: [tensor(0.6652), tensor(0.2307), tensor(0.1042)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78290993 0.49836066 0.50867052]
- f1 (average): 0.59664703

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0724536180496216
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9943448901176453
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.941481660712849 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.2705845832824707
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.24389423429965973
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9391739585182883 || Accuracy: 0.6775631308555603 || F1-score: 0.5907092821166552
Early stopping at epoch 126!
Accuracy on dataset of size 672: 69.04762268066406 %.
Average loss: 0.9894102215766907
proportion of labels in prediction: [tensor(0.6741), tensor(0.2158), tensor(0.1101)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.7912844  0.50169492 0.50847458]
- f1 (average): 0.60048463

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.174203872680664
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.7820780277252197
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8391301848671653 || Accuracy: 0.6537889838218689 || F1-score: 0.43733071671183416
Epoch: 101/10000 || Item: 0/85 || Loss: 0.3944697678089142
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.03525356575846672
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0298169417814775 || Accuracy: 0.689450204372406 || F1-score: 0.5992995500071298
Early stopping at epoch 116!
Accuracy on dataset of size 672: 70.53571319580078 %.
Average loss: 1.0018632168119603
proportion of labels in prediction: [tensor(0.6726), tensor(0.2202), tensor(0.1071)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80367394 0.54362416 0.49142857]
- f1 (average): 0.612908890

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.101671814918518
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.8468042612075806
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8828405141830444 || Accuracy: 0.6508172154426575 || F1-score: 0.4551098408913714
Epoch: 101/10000 || Item: 0/85 || Loss: 0.42085975408554077
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.00869707390666008
--------------------------------------------------
Validation || Epoch: 101 || Loss: 1.0599372766234658 || Accuracy: 0.6627042889595032 || F1-score: 0.5727171197627999
Early stopping at epoch 121!
Accuracy on dataset of size 672: 68.89881134033203 %.
Average loss: 1.06849773905494
proportion of labels in prediction: [tensor(0.6443), tensor(0.2426), tensor(0.1131)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78873239 0.50479233 0.53631285]
- f1 (average): 0.6099458585

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.042513132095337
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.930135190486908
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8825143304738131 || Accuracy: 0.6344724893569946 || F1-score: 0.30546387972130545
Epoch: 101/10000 || Item: 0/85 || Loss: 0.5460414886474609
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.0366196371614933
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9511366432363336 || Accuracy: 0.6716195940971375 || F1-score: 0.5669770948601981
Early stopping at epoch 115!
Accuracy on dataset of size 672: 68.60118865966797 %.
Average loss: 0.9354486844756387
proportion of labels in prediction: [tensor(0.6741), tensor(0.2292), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79357798 0.48684211 0.48809524]
- f1 (average): 0.5895051083

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1079468727111816
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9050264954566956
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9055643948641691 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.2618783116340637
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.24361050128936768
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.9748089367693121 || Accuracy: 0.679049015045166 || F1-score: 0.5868828704502329
Early stopping at epoch 123!
Accuracy on dataset of size 672: 67.85713958740234 %.
Average loss: 1.0673792145468972
proportion of labels in prediction: [tensor(0.6577), tensor(0.2307), tensor(0.1116)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78513357 0.47868852 0.50561798]
- f1 (average): 0.58981335

### CLS

In [222]:
x_data, input_channels = obtain_SDSN_input(pooled_cls_pretrained, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/13551 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.

********** lstm_hidden_dim: [8, 8] || ffnhidden_dim: [100, 100]


  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.2040929794311523
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9543325304985046
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8827005624771118 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7267993688583374
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.19608074426651
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7830820517106489 || Accuracy: 0.6805348992347717 || F1-score: 0.5370926091179745
Early stopping at epoch 125!
Accuracy on dataset of size 672: 70.08928680419922 %.
Average loss: 0.7530356483025984
proportion of labels in prediction: [tensor(0.7426), tensor(0.1696), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.81481481 0.46969697 0.43209877]
- f1 (average): 0.5722035166

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1437132358551025
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.001387596130371
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9409243518655951 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.733620285987854
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.146580770611763
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7862045493992892 || Accuracy: 0.679049015045166 || F1-score: 0.5387477369664612
Early stopping at epoch 123!
Accuracy on dataset of size 672: 69.49404907226562 %.
Average loss: 0.7522918527776544
proportion of labels in prediction: [tensor(0.7232), tensor(0.1801), tensor(0.0967)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80441989 0.49446494 0.42857143]
- f1 (average): 0.575818754241

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1192790269851685
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9309695959091187
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9031798947941173 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7530666589736938
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.5289303064346313
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8341609022834084 || Accuracy: 0.679049015045166 || F1-score: 0.5430116182079968
Early stopping at epoch 135!
Accuracy on dataset of size 672: 68.75 %.
Average loss: 0.7924448576840487
proportion of labels in prediction: [tensor(0.7232), tensor(0.1756), tensor(0.1012)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79558011 0.46268657 0.46783626]
- f1 (average): 0.5753676449904527
- ac

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1225972175598145
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 1.020470380783081
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9420102292841132 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.578769862651825
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.3644253611564636
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7749437364664945 || Accuracy: 0.6924219727516174 || F1-score: 0.5651662690808598
Early stopping at epoch 133!
Accuracy on dataset of size 672: 71.875 %.
Average loss: 0.7509793043136597
proportion of labels in prediction: [tensor(0.7173), tensor(0.1845), tensor(0.0982)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.82574917 0.52554745 0.46153846]
- f1 (average): 0.6042783581285004
- ac

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1425316333770752
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9537950754165649
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.885266201062636 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6237946152687073
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.1699737012386322
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8081171404231678 || Accuracy: 0.6671619415283203 || F1-score: 0.5278475681152801
Early stopping at epoch 125!
Accuracy on dataset of size 672: 68.30357360839844 %.
Average loss: 0.7727127833799883
proportion of labels in prediction: [tensor(0.7336), tensor(0.1786), tensor(0.0878)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79605263 0.45185185 0.43209877]
- f1 (average): 0.560001082

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1180133819580078
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9255759716033936
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9132819771766663 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7313652634620667
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.0869215875864029
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8132577646862377 || Accuracy: 0.658246636390686 || F1-score: 0.4984643819573214
Early stopping at epoch 123!
Accuracy on dataset of size 672: 68.1547622680664 %.
Average loss: 0.7469774267890237
proportion of labels in prediction: [tensor(0.7173), tensor(0.1875), tensor(0.0952)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.80133185 0.44202899 0.43113772]
- f1 (average): 0.5581661878

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0959380865097046
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9692442417144775
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9381837411360308 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8282528519630432
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.18298083543777466
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8390961018475619 || Accuracy: 0.6671619415283203 || F1-score: 0.5085976184094277
Epoch: 201/10000 || Item: 0/85 || Loss: 0.6801908016204834
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 0.9419746398925781
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.819741119037975 || Accuracy: 0.6671619415283203 || F1-score: 0.5085976184094277
Epoch: 301/10000 |

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.127925157546997
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9608778953552246
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9378372539173473 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8151942491531372
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7873227000236511
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8786657940257679 || Accuracy: 0.6344724893569946 || F1-score: 0.4162058177243729
Early stopping at epoch 139!
Accuracy on dataset of size 672: 64.58333587646484 %.
Average loss: 0.8206196373159235
proportion of labels in prediction: [tensor(0.6935), tensor(0.2902), tensor(0.0164)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79548023 0.44057971 0.10526316]
- f1 (average): 0.447107698

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.0581080913543701
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.965984046459198
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.8825081424279646 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.7237440943717957
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.2629973292350769
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7669510462067344 || Accuracy: 0.6879643201828003 || F1-score: 0.5468867367093668
Epoch: 201/10000 || Item: 0/85 || Loss: 0.8005213737487793
--------------------------------------------------
##### Epoch: 201/10000 || Loss: 1.259475827217102
--------------------------------------------------
Validation || Epoch: 201 || Loss: 0.7732626741582697 || Accuracy: 0.6879643201828003 || F1-score: 0.5468867367093668
Epoch: 301/10000 || 

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.1116864681243896
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9596397280693054
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9104283506220038 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6912787556648254
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.1996433138847351
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.7782505317167803 || Accuracy: 0.6939078569412231 || F1-score: 0.56118227629019
Early stopping at epoch 123!
Accuracy on dataset of size 672: 68.30357360839844 %.
Average loss: 0.7525844194672324
proportion of labels in prediction: [tensor(0.7202), tensor(0.1905), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79512735 0.48201439 0.40490798]
- f1 (average): 0.5606832390

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.096859335899353
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9114543795585632
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.907750584862449 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.8409744501113892
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.5794983506202698
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8129292293028398 || Accuracy: 0.6671619415283203 || F1-score: 0.5217717128873098
Early stopping at epoch 129!
Accuracy on dataset of size 672: 67.70833587646484 %.
Average loss: 0.8091904737732627
proportion of labels in prediction: [tensor(0.7455), tensor(0.1652), tensor(0.0893)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.79565217 0.45210728 0.36809816]
- f1 (average): 0.5386192043

  train, valid, test = split_dataset(x_data=torch.tensor(x_data).float(),


  0%|          | 0/10000 [00:00<?, ?it/s]

Epoch: 1/10000 || Item: 0/85 || Loss: 1.093720555305481
--------------------------------------------------
##### Epoch: 1/10000 || Loss: 0.9559621214866638
--------------------------------------------------
Validation || Epoch: 1 || Loss: 0.9292432503266768 || Accuracy: 0.6225854158401489 || F1-score: 0.25579975579975583
Epoch: 101/10000 || Item: 0/85 || Loss: 0.6506471633911133
--------------------------------------------------
##### Epoch: 101/10000 || Loss: 0.7666477560997009
--------------------------------------------------
Validation || Epoch: 101 || Loss: 0.8619428331201727 || Accuracy: 0.6359583735466003 || F1-score: 0.40043296039233195
Early stopping at epoch 120!
Accuracy on dataset of size 672: 63.39285659790039 %.
Average loss: 0.849154607816176
proportion of labels in prediction: [tensor(0.7202), tensor(0.2798), tensor(0.)]
proportion of labels in data: [tensor(0.6235), tensor(0.2232), tensor(0.1533)]
- f1: [0.78848283 0.41420118 0.        ]
- f1 (average): 0.4008946728088

## Fine-tuned BERT

### Mean pooled

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_mean, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

### Max pooled

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_max, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

### Sum pooled

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_sum, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

### CLS

In [None]:
x_data, input_channels = obtain_SDSN_input(pooled_cls, path_specifics)
for lstm_hidden_dim in lstm_hidden_dim_trial:
    for ffn_hidden_dim in ffn_hidden_dim_trial:
        print(f"\n********** lstm_hidden_dim: {lstm_hidden_dim} "
              f"|| ffnhidden_dim: {ffn_hidden_dim}")
        implement_sdsn(x_data=x_data,
                       y_data=y_data,
                       sig_depth=sig_depth,
                       input_channels=input_channels,
                       output_channels=output_channels,
                       lstm_hidden_dim=lstm_hidden_dim,
                       ffn_hidden_dim=ffn_hidden_dim,
                       BiLSTM=BiLSTM,
                       learning_rate=learning_rate,
                       loss=loss)

Baselines:
   - just looking at the sentence embeddings (encodes nothing about the history on the post)
       - highlights importance of looking at the sequence
   - averaging history
   - comparing the cosine similarity between previous post and current post to see if switch
   
Test for:
- How many posts do you need to look back?