In [1]:
import numpy as np
import pickle
import os

seed = 2023

In [2]:
import torch

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
from nlpsig_networks.scripts.seqsignet_full_attention_functions import (
    seqsignet_full_attention_hyperparameter_search,
)

In [4]:
output_dir = "client_talk_type_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## AnnoMI

In [5]:
%run ../load_anno_mi.py

In [6]:
anno_mi.head()

Unnamed: 0,mi_quality,transcript_id,topic,utterance_id,interlocutor,timestamp,utterance_text,annotator_id,therapist_input_exists,therapist_input_subtype,reflection_exists,reflection_subtype,question_exists,question_subtype,main_therapist_behaviour,client_talk_type,datetime,speaker
0,high,0,reducing alcohol consumption,0,therapist,00:00:13,Thanks for filling it out. We give this form t...,3,False,,False,,True,open,question,,2023-10-02 00:00:13,-1
1,high,0,reducing alcohol consumption,1,client,00:00:24,Sure.,3,,,,,,,,neutral,2023-10-02 00:00:24,1
2,high,0,reducing alcohol consumption,2,therapist,00:00:25,"So, let's see. It looks that you put-- You dri...",3,True,information,False,,False,,therapist_input,,2023-10-02 00:00:25,-1
3,high,0,reducing alcohol consumption,3,client,00:00:34,Mm-hmm.,3,,,,,,,,neutral,2023-10-02 00:00:34,1
4,high,0,reducing alcohol consumption,4,therapist,00:00:34,-and you usually have three to four drinks whe...,3,True,information,False,,False,,therapist_input,,2023-10-02 00:00:34,-1


In [7]:
with open("../anno_mi_sbert.pkl", "rb") as f:
    sbert_embeddings = pickle.load(f)

sbert_embeddings.shape

(9699, 384)

In [8]:
features = ["timeline_index", "speaker"]
standardise_method = [None, None]
include_features_in_path = True
include_features_in_input = False

In [9]:
num_epochs = 100
dimensions = [15]
swmhau_parameters = [(12, 3, 10), (8, 4, 6)]
num_layers = [1]
ffn_hidden_dim_sizes = [[256, 256], [512, 512]]
dropout_rates = [0.1, 0.2]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [1, 12, 123]
loss = "focal"
gamma = 2
validation_metric = "f1"
patience = 5

In [10]:
kwargs = {
    "num_epochs": num_epochs,
    "df": anno_mi,
    "id_column": "transcript_id",
    "label_column": "client_talk_type",
    "embeddings": sbert_embeddings,
    "y_data": y_data_client,
    "output_dim": output_dim_client,
    "dimensions": dimensions,
    "log_signature": True,
    "swmhau_parameters": swmhau_parameters,
    "num_layers": num_layers,
    "ffn_hidden_dim_sizes": ffn_hidden_dim_sizes,
    "dropout_rates": dropout_rates,
    "learning_rates": learning_rates,
    "seeds": seeds,
    "loss": loss,
    "gamma": gamma,
    "device": device,
    "features": features,
    "standardise_method": standardise_method,
    "include_features_in_path": include_features_in_path,
    "include_features_in_input": include_features_in_input,
    "path_indices": client_index,
    "split_ids": client_transcript_id,
    "k_fold": True,
    "patience": patience,
    "validation_metric": validation_metric,
    "verbose": False,
}

# history_length=11

In [11]:
shift = 3
window_size = 5
n = 3

In [12]:
(
    seqsignet_attention_mha_umap_kfold_11,
    best_seqsignet_attention_mha_umap_kfold_11,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["umap"],
    results_output=f"{output_dir}/seqsignet_attention_mha_umap_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: umap
given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_umap_focal_2_3_5_3_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_umap_focal_2_3_5_3_kfold_best_model.csv


In [13]:
seqsignet_attention_mha_umap_kfold_11

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size,model_id
0,,0.645594,0.515260,"[0.7709431751611013, 0.4511840465309514, 0.323...",0.511037,"[0.7810089020771513, 0.45439330543933054, 0.29...",0.521233,"[0.7611336032388664, 0.44801980198019803, 0.35...",,0.658478,...,1,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.641188,0.517020,"[0.7687997640813918, 0.4445414847161572, 0.337...",0.512954,"[0.7842960288808665, 0.47217068645640076, 0.28...",0.531290,"[0.753903990746096, 0.41996699669966997, 0.42]",,0.667148,...,12,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.649808,0.517789,"[0.775892334698654, 0.453377538334024, 0.32409...",0.513875,"[0.7850799289520426, 0.45545378850957535, 0.30...",0.523049,"[0.7669172932330827, 0.4513201320132013, 0.350...",,0.669236,...,123,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.620881,0.442699,"[0.7602836879432624, 0.3857949959644875, 0.182...",0.450984,"[0.7461024498886414, 0.37756714060031593, 0.22...",0.440104,"[0.7750144592249856, 0.3943894389438944, 0.150...",,0.644348,...,1,focal,2,True,5,Conv1d,,concatenation,64,1
0,,0.602682,0.444819,"[0.7475183426845058, 0.34862385321100914, 0.23...",0.445505,"[0.743773260807329, 0.3524451939291737, 0.2402...",0.444183,"[0.7513013302486987, 0.3448844884488449, 0.236...",,0.628934,...,12,focal,2,True,5,Conv1d,,concatenation,64,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,,0.588123,0.453424,"[0.7287290312830588, 0.38890970400899216, 0.24...",0.450405,"[0.7632162076606521, 0.3562113932738504, 0.231...",0.459996,"[0.6972238288027762, 0.4282178217821782, 0.254...",,0.621066,...,12,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.616858,0.458677,"[0.7562018819503848, 0.35914065666801775, 0.26...",0.468131,"[0.7457817772778402, 0.3529880478087649, 0.305...",0.453234,"[0.7669172932330827, 0.3655115511551155, 0.227...",,0.642100,...,123,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.642146,0.504475,"[0.7761925349473987, 0.40241075567918405, 0.33...",0.505421,"[0.7736282677391554, 0.45925925925925926, 0.28...",0.515317,"[0.7787738577212261, 0.3580858085808581, 0.409...",,0.669236,...,1,focal,2,True,5,Conv1d,,concatenation,64,23
0,,0.646552,0.521250,"[0.7721592560302238, 0.4274120829576195, 0.364...",0.518676,"[0.7759929906542056, 0.4711729622266402, 0.308...",0.534363,"[0.7683632157316368, 0.3910891089108911, 0.443...",,0.662653,...,12,focal,2,True,5,Conv1d,,concatenation,64,23


In [14]:
best_seqsignet_attention_mha_umap_kfold_11["f1"].mean()

0.5011317343319162

In [15]:
best_seqsignet_attention_mha_umap_kfold_11["precision"].mean()

0.501423817705441

In [16]:
best_seqsignet_attention_mha_umap_kfold_11["recall"].mean()

0.5073380601989252

In [17]:
np.stack(best_seqsignet_attention_mha_umap_kfold_11["f1_scores"]).mean(axis=0)

array([0.76704038, 0.41825766, 0.31809716])

In [18]:
np.stack(best_seqsignet_attention_mha_umap_kfold_11["precision_scores"]).mean(axis=0)

array([0.76868738, 0.44003164, 0.29555244])

In [19]:
np.stack(best_seqsignet_attention_mha_umap_kfold_11["recall_scores"]).mean(axis=0)

array([0.76576056, 0.39988999, 0.35636364])

## GRP

In [20]:
(
    seqsignet_attention_mha_grp_kfold_11,
    best_seqsignet_attention_mha_grp_kfold_11,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_mha_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 3: history length = 11
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_grp_focal_2_3_5_3_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_grp_focal_2_3_5_3_kfold_best_model.csv


In [21]:
seqsignet_attention_mha_grp_kfold_11

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size,model_id
0,,0.635057,0.518282,"[0.7630833457581632, 0.437609841827768, 0.3541...",0.513375,"[0.7876269621421976, 0.4680451127819549, 0.284...",0.540002,"[0.7400231347599768, 0.41089108910891087, 0.46...",,0.655588,...,1,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.633908,0.514896,"[0.7631931024230713, 0.4353680430879712, 0.346...",0.512495,"[0.7852554297950444, 0.47736220472440943, 0.27...",0.536591,"[0.7423366107576634, 0.40016501650165015, 0.46...",,0.659441,...,12,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.640805,0.514914,"[0.7685103892303191, 0.43027888446215135, 0.34...",0.511458,"[0.7778436018957346, 0.46418338108882523, 0.29...",0.528008,"[0.7593984962406015, 0.400990099009901, 0.4236...",,0.667148,...,123,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.624904,0.457209,"[0.7601874201334659, 0.38722554890219557, 0.22...",0.471406,"[0.7467224546722455, 0.3750966744006187, 0.292...",0.452043,"[0.7741469057258531, 0.40016501650165015, 0.18...",,0.657514,...,1,focal,2,True,5,Conv1d,,concatenation,64,1
0,,0.631801,0.480894,"[0.7631238314396663, 0.4079522862823061, 0.271...",0.488529,"[0.7590844062947067, 0.39370683039140447, 0.31...",0.476825,"[0.7672064777327935, 0.42326732673267325, 0.24]",,0.665061,...,12,focal,2,True,5,Conv1d,,concatenation,64,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,,0.628352,0.479312,"[0.7651869158878505, 0.4085603112840467, 0.264...",0.481825,"[0.7728613569321534, 0.3865979381443299, 0.286...",0.478762,"[0.7576633892423366, 0.43316831683168316, 0.24...",,0.655267,...,12,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.600192,0.438723,"[0.7411627227578147, 0.37676842889054357, 0.19...",0.447833,"[0.7488193624557261, 0.34328358208955223, 0.25...",0.438263,"[0.7336610757663389, 0.4174917491749175, 0.163...",,0.651252,...,123,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.634674,0.520512,"[0.7608662729062547, 0.45924132364810333, 0.34...",0.511650,"[0.7987281399046104, 0.44944707740916273, 0.28...",0.539241,"[0.7264314632735686, 0.46947194719471946, 0.42...",,0.655267,...,1,focal,2,True,5,Conv1d,,concatenation,64,23
0,,0.645019,0.518685,"[0.7718890663569045, 0.4218678815489749, 0.362...",0.516872,"[0.7751531058617673, 0.47100712105798576, 0.30...",0.532646,"[0.7686524002313476, 0.382013201320132, 0.4472...",,0.665864,...,12,focal,2,True,5,Conv1d,,concatenation,64,23


In [22]:
best_seqsignet_attention_mha_grp_kfold_11["f1"].mean()

0.5132267886107895

In [23]:
best_seqsignet_attention_mha_grp_kfold_11["precision"].mean()

0.5087124344063694

In [24]:
best_seqsignet_attention_mha_grp_kfold_11["recall"].mean()

0.5236764343486594

In [25]:
np.stack(best_seqsignet_attention_mha_grp_kfold_11["f1_scores"]).mean(axis=0)

array([0.76442744, 0.44165738, 0.33359554])

In [26]:
np.stack(best_seqsignet_attention_mha_grp_kfold_11["precision_scores"]).mean(axis=0)

array([0.77840999, 0.4535798 , 0.29414751])

In [27]:
np.stack(best_seqsignet_attention_mha_grp_kfold_11["recall_scores"]).mean(axis=0)

array([0.75130133, 0.43124312, 0.38848485])

# history_length=20

In [28]:
shift = 3
window_size = 5
n = 6

In [29]:
(
    seqsignet_attention_mha_umap_kfold_20,
    best_seqsignet_attention_mha_umap_kfold_20,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["umap"],
    results_output=f"{output_dir}/seqsignet_attention_mha_umap_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: umap
given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_umap_focal_2_3_5_6_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_umap_focal_2_3_5_6_kfold_best_model.csv


In [30]:
seqsignet_attention_mha_umap_kfold_20

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size,model_id
0,,0.637356,0.516129,"[0.7654650645304851, 0.44117647058823534, 0.34...",0.510886,"[0.7858665854401462, 0.4636363636363636, 0.283...",0.532599,"[0.746096009253904, 0.4207920792079208, 0.4309...",,0.668433,...,1,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.636782,0.513819,"[0.7676232523914643, 0.4186691312384473, 0.355...",0.513291,"[0.7815403056637699, 0.47584033613445376, 0.28...",0.535379,"[0.7541931752458069, 0.37376237623762376, 0.47...",,0.660405,...,12,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.637356,0.515270,"[0.7667109340416113, 0.4293429342934293, 0.349...",0.512624,"[0.7827658933413679, 0.47227722772277225, 0.28...",0.534349,"[0.7513013302486987, 0.3935643564356436, 0.458...",,0.661207,...,123,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.597126,0.432631,"[0.7443189938545091, 0.33028286189683853, 0.22...",0.435358,"[0.7358010737496468, 0.33305369127516776, 0.23...",0.430501,"[0.7530364372469636, 0.32755775577557755, 0.21...",,0.635838,...,1,focal,2,True,5,Conv1d,,concatenation,64,1
0,,0.597701,0.451603,"[0.7373244641537325, 0.3819628647214854, 0.235...",0.452792,"[0.7541578469912307, 0.35318850735809393, 0.25...",0.452962,"[0.7212261422787739, 0.4158415841584158, 0.221...",,0.640013,...,12,focal,2,True,5,Conv1d,,concatenation,64,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,,0.615709,0.460318,"[0.7609350351355227, 0.35045317220543803, 0.26...",0.460173,"[0.7547652916073969, 0.36742081447963804, 0.25...",0.461336,"[0.7672064777327935, 0.334983498349835, 0.2818...",,0.664258,...,12,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.624138,0.454414,"[0.7642140468227424, 0.34387527839643656, 0.25...",0.462783,"[0.7374932759548144, 0.3736689254598258, 0.277...",0.449263,"[0.7929438982070561, 0.3184818481848185, 0.236...",,0.662813,...,123,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.653831,0.508982,"[0.7829787234042553, 0.41834862385321103, 0.32...",0.512644,"[0.7683741648106904, 0.47107438016528924, 0.29...",0.510856,"[0.7981492192018508, 0.37623762376237624, 0.35...",,0.664740,...,1,focal,2,True,5,Conv1d,,concatenation,64,23
0,,0.641379,0.520587,"[0.7706666666666666, 0.4329718004338395, 0.358...",0.514548,"[0.790097205346294, 0.4565416285452882, 0.2970...",0.538265,"[0.7521688837478311, 0.41171617161716173, 0.45...",,0.663134,...,12,focal,2,True,5,Conv1d,,concatenation,64,23


In [31]:
best_seqsignet_attention_mha_umap_kfold_20["f1"].mean()

0.5043913067188163

In [32]:
best_seqsignet_attention_mha_umap_kfold_20["precision"].mean()

0.4963152847177106

In [33]:
best_seqsignet_attention_mha_umap_kfold_20["recall"].mean()

0.5235104559231708

In [34]:
np.stack(best_seqsignet_attention_mha_umap_kfold_20["f1_scores"]).mean(axis=0)

array([0.74594372, 0.44552708, 0.32170312])

In [35]:
np.stack(best_seqsignet_attention_mha_umap_kfold_20["precision_scores"]).mean(axis=0)

array([0.79084425, 0.42861956, 0.26948205])

In [36]:
np.stack(best_seqsignet_attention_mha_umap_kfold_20["recall_scores"]).mean(axis=0)

array([0.70628494, 0.46424642, 0.4       ])

## GRP

In [37]:
(
    seqsignet_attention_mha_grp_kfold_20,
    best_seqsignet_attention_mha_grp_kfold_20,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_mha_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: gaussian_random_projection
given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 6: history length = 20
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.
saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_grp_focal_2_3_5_6_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/seqsignet_attention_mha_grp_focal_2_3_5_6_kfold_best_model.csv


In [38]:
seqsignet_attention_mha_grp_kfold_20

Unnamed: 0,loss,accuracy,f1,f1_scores,precision,precision_scores,recall,recall_scores,valid_loss,valid_accuracy,...,seed,loss_function,gamma,k_fold,n_splits,augmentation_type,hidden_dim_aug,comb_method,batch_size,model_id
0,,0.643295,0.518670,"[0.7702484198147875, 0.4408834993503681, 0.344...",0.513865,"[0.7832585949177877, 0.4639927073837739, 0.294...",0.531331,"[0.7576633892423366, 0.41996699669966997, 0.41...",,0.667951,...,1,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.637548,0.507900,"[0.7676974937710686, 0.4297520661157025, 0.326...",0.504239,"[0.7783060921248143, 0.45446182152713893, 0.27...",0.518625,"[0.7573742047426258, 0.4075907590759076, 0.390...",,0.664258,...,12,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.638697,0.525275,"[0.760179640718563, 0.46192259675405745, 0.353...",0.517138,"[0.7880198634388579, 0.4659949622166247, 0.297...",0.542841,"[0.7342394447657605, 0.45792079207920794, 0.43...",,0.661529,...,123,focal,2,True,5,Conv1d,,concatenation,64,0
0,,0.615326,0.471926,"[0.7521994134897361, 0.3912037037037037, 0.272...",0.474406,"[0.762938726948245, 0.3673913043478261, 0.2928...",0.471540,"[0.7417582417582418, 0.4183168316831683, 0.254...",,0.664098,...,1,focal,2,True,5,Conv1d,,concatenation,64,1
0,,0.629119,0.489164,"[0.7648866130212143, 0.4071515644047135, 0.295...",0.486564,"[0.7740598164050932, 0.4011208967173739, 0.284...",0.492189,"[0.7559282822440717, 0.41336633663366334, 0.30...",,0.667791,...,12,focal,2,True,5,Conv1d,,concatenation,64,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,,0.634674,0.486503,"[0.7690763052208835, 0.39797211660329534, 0.29...",0.487645,"[0.7629482071713147, 0.4077922077922078, 0.292...",0.485548,"[0.7753036437246964, 0.3886138613861386, 0.292...",,0.679994,...,12,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.603831,0.483467,"[0.7378141083863153, 0.4019870080244554, 0.310...",0.477293,"[0.7741423125794155, 0.37437722419928826, 0.28...",0.494124,"[0.7047426257952574, 0.43399339933993397, 0.34...",,0.656872,...,123,focal,2,True,5,Conv1d,,concatenation,64,22
0,,0.633716,0.519062,"[0.7587659894657637, 0.44899598393574297, 0.34...",0.510138,"[0.79102604330091, 0.43740219092331767, 0.3019...",0.534934,"[0.7290341237709659, 0.4612211221122112, 0.414...",,0.663455,...,1,focal,2,True,5,Conv1d,,concatenation,64,23
0,,0.605556,0.501166,"[0.7358693978130294, 0.4101346001583532, 0.357...",0.490995,"[0.787149917627677, 0.3942161339421613, 0.2916...",0.526691,"[0.6908617698091383, 0.4273927392739274, 0.461...",,0.643385,...,12,focal,2,True,5,Conv1d,,concatenation,64,23


In [39]:
best_seqsignet_attention_mha_grp_kfold_20["f1"].mean()

0.5033058602955406

In [40]:
best_seqsignet_attention_mha_grp_kfold_20["precision"].mean()

0.4993054531774514

In [41]:
best_seqsignet_attention_mha_grp_kfold_20["recall"].mean()

0.5093960878640399

In [42]:
np.stack(best_seqsignet_attention_mha_grp_kfold_20["f1_scores"]).mean(axis=0)

array([0.76246417, 0.42082179, 0.32663162])

In [43]:
np.stack(best_seqsignet_attention_mha_grp_kfold_20["precision_scores"]).mean(axis=0)

array([0.77152897, 0.42556551, 0.30082188])

In [44]:
np.stack(best_seqsignet_attention_mha_grp_kfold_20["recall_scores"]).mean(axis=0)

array([0.75361481, 0.41639164, 0.35818182])

# history_length=35

In [45]:
shift = 3
window_size = 5
n = 11

In [None]:
(
    seqsignet_attention_mha_umap_kfold_35,
    best_seqsignet_attention_mha_umap_kfold_35,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["umap"],
    results_output=f"{output_dir}/seqsignet_attention_mha_umap_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]


##################################################
dimension: 15 | method: umap
given shift 3, window size 5 and n 11: history length = 35
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

given shift 3, window size 5 and n 11: history length = 35
[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] - columns beginning with 'd' denote the dimension reduced embeddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' feature...
[INFO] Adding 'time_diff' feature...
[INFO] Adding 'timeline_index' feature...
[INFO] Padding ids and storing in `.df_padded` and `.array_padded` attributes.


  0%|          | 0/9699 [00:00<?, ?it/s]

[INFO] The path was created for each item in the dataframe, by looking at its history, so to include embeddings in the FFN input, we concatenate the embeddings for each sentence / text.


In [None]:
seqsignet_attention_mha_umap_kfold_35

In [None]:
best_seqsignet_attention_mha_umap_kfold_35["f1"].mean()

In [None]:
best_seqsignet_attention_mha_umap_kfold_35["precision"].mean()

In [None]:
best_seqsignet_attention_mha_umap_kfold_35["recall"].mean()

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_35["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_35["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_35["recall_scores"]).mean(axis=0)

## GRP

In [None]:
(
    seqsignet_attention_mha_grp_kfold_35,
    best_seqsignet_attention_mha_grp_kfold_35,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_mha_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

In [None]:
seqsignet_attention_mha_grp_kfold_35

In [None]:
best_seqsignet_attention_mha_grp_kfold_35["f1"].mean()

In [None]:
best_seqsignet_attention_mha_grp_kfold_35["precision"].mean()

In [None]:
best_seqsignet_attention_mha_grp_kfold_35["recall"].mean()

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_35["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_35["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_35["recall_scores"]).mean(axis=0)

# history_length=80

In [None]:
shift = 3
window_size = 5
n = 26

In [None]:
(
    seqsignet_attention_mha_umap_kfold_80,
    best_seqsignet_attention_mha_umap_kfold_80,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["umap"],
    results_output=f"{output_dir}/seqsignet_attention_mha_umap_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

In [None]:
seqsignet_attention_mha_umap_kfold_80

In [None]:
best_seqsignet_attention_mha_umap_kfold_80["f1"].mean()

In [None]:
best_seqsignet_attention_mha_umap_kfold_80["precision"].mean()

In [None]:
best_seqsignet_attention_mha_umap_kfold_80["recall"].mean()

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_80["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_80["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_80["recall_scores"]).mean(axis=0)

## GRP

In [None]:
(
    seqsignet_attention_mha_grp_kfold_80,
    best_seqsignet_attention_mha_grp_kfold_80,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_mha_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

In [None]:
seqsignet_attention_mha_grp_kfold_80

In [None]:
best_seqsignet_attention_mha_grp_kfold_80["f1"].mean()

In [None]:
best_seqsignet_attention_mha_grp_kfold_80["precision"].mean()

In [None]:
best_seqsignet_attention_mha_grp_kfold_80["recall"].mean()

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_80["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_80["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_80["recall_scores"]).mean(axis=0)

# history_length=110

In [None]:
shift = 3
window_size = 5
n = 36

In [None]:
(
    seqsignet_attention_mha_umap_kfold_110,
    best_seqsignet_attention_mha_umap_kfold_110,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["umap"],
    results_output=f"{output_dir}/seqsignet_attention_mha_umap_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

In [None]:
seqsignet_attention_mha_umap_kfold_110

In [None]:
best_seqsignet_attention_mha_umap_kfold_110["f1"].mean()

In [None]:
best_seqsignet_attention_mha_umap_kfold_110["precision"].mean()

In [None]:
best_seqsignet_attention_mha_umap_kfold_110["recall"].mean()

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_110["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_110["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_umap_kfold_110["recall_scores"]).mean(axis=0)

## GRP

In [None]:
(
    seqsignet_attention_mha_grp_kfold_110,
    best_seqsignet_attention_mha_grp_kfold_110,
    _,
    __,
) = seqsignet_full_attention_hyperparameter_search(
    shift=shift,
    window_size=window_size,
    n=n,
    dim_reduce_methods=["gaussian_random_projection"],
    results_output=f"{output_dir}/seqsignet_attention_mha_grp_focal_{gamma}_{shift}_{window_size}_{n}_kfold.csv",
    **kwargs,
)

In [None]:
seqsignet_attention_mha_grp_kfold_110

In [None]:
best_seqsignet_attention_mha_grp_kfold_110["f1"].mean()

In [None]:
best_seqsignet_attention_mha_grp_kfold_110["precision"].mean()

In [None]:
best_seqsignet_attention_mha_grp_kfold_110["recall"].mean()

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_110["f1_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_110["precision_scores"]).mean(axis=0)

In [None]:
np.stack(best_seqsignet_attention_mha_grp_kfold_110["recall_scores"]).mean(axis=0)