In [1]:
import numpy as np
import pickle
import os

seed = 2023

In [2]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    histories_baseline_hyperparameter_search
)

In [3]:
output_dir = "client_talk_type_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## AnnoMI

In [4]:
%run ../load_anno_mi.py

In [5]:
anno_mi.head()

Unnamed: 0,mi_quality,transcript_id,topic,utterance_id,interlocutor,timestamp,utterance_text,annotator_id,therapist_input_exists,therapist_input_subtype,reflection_exists,reflection_subtype,question_exists,question_subtype,main_therapist_behaviour,client_talk_type,datetime
0,high,0,reducing alcohol consumption,0,therapist,00:00:13,Thanks for filling it out. We give this form t...,3,False,,False,,True,open,question,,2023-06-27 00:00:13
1,high,0,reducing alcohol consumption,1,client,00:00:24,Sure.,3,,,,,,,,neutral,2023-06-27 00:00:24
2,high,0,reducing alcohol consumption,2,therapist,00:00:25,"So, let's see. It looks that you put-- You dri...",3,True,information,False,,False,,therapist_input,,2023-06-27 00:00:25
3,high,0,reducing alcohol consumption,3,client,00:00:34,Mm-hmm.,3,,,,,,,,neutral,2023-06-27 00:00:34
4,high,0,reducing alcohol consumption,4,therapist,00:00:34,-and you usually have three to four drinks whe...,3,True,information,False,,False,,therapist_input,,2023-06-27 00:00:34


In [6]:
with open("../anno_mi_sbert.pkl", "rb") as f:
    sbert_embeddings = pickle.load(f)
    
sbert_embeddings.shape

(13551, 384)

# Baseline: Averaging history and use FFN

Here, we average the full history of a path and concatenate it the current embedding (the total number of features that are passed into the FFN is `2 * sbert_embeddings.shape[0]`).

Here, we will run the hyperparameter search to implement the FFN with the same parameters as the standard FFN baseline on the sentence embeddings. Going to try out some variations (1 hidden layer, 2 hidden layers and 3 hidden layers - all of size 100).

In [7]:
num_epochs = 100
hidden_dim_sizes = [32, 64, 128, 256, 512]
dropout_rates = [0.5, 0.2, 0.1]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [0, 1, 12, 123, 1234]
loss = "focal"
gamma = 2
validation_metric = "f1"

In [8]:
hidden_dim_sizes

[32, 64, 128, 256, 512]

In [9]:
learning_rates

[0.001, 0.0001, 0.0005]

In [10]:
ffn_mean_history, best_ffn_mean_history, _, __ = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=anno_mi,
    id_column="transcript_id",
    label_column="client_talk_type",
    embeddings=sbert_embeddings,
    y_data=y_data_client,
    output_dim=output_dim_client,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    path_indices=client_index,
    k_fold=False,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_mean_history_focal_{gamma}.csv",
    verbose=False
)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2_best_model.csv


In [11]:
ffn_mean_history

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,model_id,input_dim
0,focal,0.704833,0.632095,"[0.7988059701492537, 0.5533980582524272, 0.544...",0.486807,0.731413,0.647535,"[0.8205499276410999, 0.593320235756385, 0.5287...",32,0.5,0.0010,0,2,False,0.00,768
0,focal,0.718959,0.647219,"[0.8099467140319715, 0.5639344262295082, 0.567...",0.487212,0.729554,0.638528,"[0.8251249107780158, 0.5661157024793388, 0.524...",32,0.5,0.0010,1,2,False,0.00,768
0,focal,0.710037,0.633351,"[0.8054567022538554, 0.5569620253164558, 0.537...",0.489970,0.728625,0.639864,"[0.8216606498194946, 0.5843137254901961, 0.513...",32,0.5,0.0010,12,2,False,0.00,768
0,focal,0.708550,0.642357,"[0.797583081570997, 0.5701219512195121, 0.5593...",0.482732,0.730483,0.644781,"[0.8222384784198975, 0.5984848484848484, 0.513...",32,0.5,0.0010,123,2,False,0.00,768
0,focal,0.699628,0.610388,"[0.8043854587420657, 0.4937833037300178, 0.532...",0.513050,0.732342,0.635193,"[0.8290960451977402, 0.5648535564853556, 0.511...",32,0.5,0.0010,1234,2,False,0.00,768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,focal,0.711524,0.631924,"[0.8049499116087212, 0.5657492354740061, 0.525...",0.480001,0.742565,0.655806,"[0.8263988522238165, 0.6142322097378277, 0.526...",512,0.1,0.0005,0,2,False,0.44,768
0,focal,0.721933,0.655556,"[0.8067429259482239, 0.599078341013825, 0.5608...",0.536933,0.741636,0.668289,"[0.8201754385964912, 0.6295585412667946, 0.555...",512,0.1,0.0005,1,2,False,0.44,768
0,focal,0.721933,0.644857,"[0.8107142857142858, 0.6006006006006006, 0.523...",0.531904,0.744424,0.666992,"[0.8209606986899564, 0.6405959031657356, 0.539...",512,0.1,0.0005,12,2,False,0.44,768
0,focal,0.706320,0.633880,"[0.7995198079231692, 0.5660377358490565, 0.536...",0.477562,0.735130,0.655032,"[0.821897810218978, 0.6073500967117988, 0.5358...",512,0.1,0.0005,123,2,False,0.44,768


In [21]:
ffn_mean_history.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()

  ffn_mean_history.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,f1,valid_loss,valid_accuracy,valid_f1,seed,gamma,k_fold,model_id,input_dim
hidden_dim,dropout_rate,learning_rate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
32,0.1,0.0001,0.695613,0.613244,0.509624,0.721561,0.627511,274.0,2.0,0.0,0.7,768.0
32,0.1,0.0005,0.69368,0.614482,0.503929,0.719888,0.629605,274.0,2.0,0.0,0.8,768.0
32,0.1,0.001,0.70855,0.636394,0.512932,0.734572,0.649357,274.0,2.0,0.0,0.6,768.0
32,0.2,0.0001,0.695762,0.612208,0.511101,0.722119,0.629324,274.0,2.0,0.0,0.4,768.0
32,0.2,0.0005,0.693532,0.615951,0.506872,0.716171,0.626653,274.0,2.0,0.0,0.5,768.0
32,0.2,0.001,0.708104,0.637757,0.506828,0.7329,0.64899,274.0,2.0,0.0,0.3,768.0
32,0.5,0.0001,0.695465,0.608918,0.525303,0.724164,0.629018,274.0,2.0,0.0,0.1,768.0
32,0.5,0.0005,0.700223,0.617436,0.509732,0.727509,0.633274,274.0,2.0,0.0,0.2,768.0
32,0.5,0.001,0.708401,0.633082,0.491954,0.730483,0.64118,274.0,2.0,0.0,0.0,768.0
64,0.1,0.0001,0.695911,0.61356,0.511539,0.719888,0.6268,274.0,2.0,0.0,0.16,768.0


In [12]:
best_ffn_mean_history

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,input_dim
0,focal,0.727138,0.64998,"[0.8173302107728336, 0.5843071786310519, 0.548...",0.496158,0.75,0.660522,"[0.8348820586132951, 0.6330645161290323, 0.513...",256,0.5,0.001,0,2,False,768
0,focal,0.729368,0.652664,"[0.8166373755125951, 0.5993485342019544, 0.542...",0.517954,0.747212,0.665545,"[0.8289191123836793, 0.6193293885601577, 0.548...",256,0.5,0.001,1,2,False,768
0,focal,0.72119,0.645401,"[0.8117647058823529, 0.5760517799352751, 0.548...",0.489424,0.751859,0.675014,"[0.8311688311688312, 0.6313725490196079, 0.5625]",256,0.5,0.001,12,2,False,768
0,focal,0.719703,0.652824,"[0.808743169398907, 0.5903614457831324, 0.5593...",0.526963,0.745353,0.675889,"[0.8227474150664698, 0.6356877323420075, 0.569...",256,0.5,0.001,123,2,False,768
0,focal,0.709294,0.634209,"[0.8033273915626856, 0.5596184419713832, 0.539...",0.473197,0.744424,0.66471,"[0.8287769784172663, 0.6062992125984252, 0.559...",256,0.5,0.001,1234,2,False,768


In [13]:
best_ffn_mean_history["f1"].mean()

0.6470156611228788

In [14]:
np.stack(best_ffn_mean_history["f1_scores"]).mean(axis=0)

array([0.81156057, 0.58193748, 0.54754894])

In [15]:
ffn_mean_history_kfold, best_ffn_mean_history_kfold, _, __ = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=anno_mi,
    id_column="transcript_id",
    label_column="client_talk_type",
    embeddings=sbert_embeddings,
    y_data=y_data_client,
    output_dim=output_dim_client,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    path_indices=client_index,
    k_fold=True,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_mean_history_focal_{gamma}_kfold.csv",
    verbose=False
)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2_kfold_best_model.csv


In [16]:
ffn_mean_history_kfold

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,model_id,input_dim
0,focal,0.690260,0.609282,"[0.7887729399064412, 0.5581395348837209, 0.480...",,0.690260,0.609282,"[0.7887729399064412, 0.5581395348837209, 0.480...",32,0.5,0.0010,0,2,True,0.00,768
0,focal,0.696506,0.610989,"[0.7958894401133947, 0.5550208533846648, 0.482...",,0.696506,0.610989,"[0.7958894401133947, 0.5550208533846648, 0.482...",32,0.5,0.0010,1,2,True,0.00,768
0,focal,0.692491,0.607981,"[0.7926496739774748, 0.5502407704654895, 0.481...",,0.692491,0.607981,"[0.7926496739774748, 0.5502407704654895, 0.481...",32,0.5,0.0010,12,2,True,0.00,768
0,focal,0.689814,0.610035,"[0.7881406793902292, 0.5567398119122258, 0.485...",,0.689814,0.610035,"[0.7881406793902292, 0.5567398119122258, 0.485...",32,0.5,0.0010,123,2,True,0.00,768
0,focal,0.685948,0.605933,"[0.7852405643313638, 0.5532824427480916, 0.479...",,0.685948,0.605933,"[0.7852405643313638, 0.5532824427480916, 0.479...",32,0.5,0.0010,1234,2,True,0.00,768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,focal,0.696357,0.610321,"[0.7953795379537955, 0.5540626999360204, 0.481...",,0.696357,0.610321,"[0.7953795379537955, 0.5540626999360204, 0.481...",512,0.1,0.0005,0,2,True,0.44,768
0,focal,0.689219,0.604244,"[0.7899940440738535, 0.5505617977528091, 0.472...",,0.689219,0.604244,"[0.7899940440738535, 0.5505617977528091, 0.472...",512,0.1,0.0005,1,2,True,0.44,768
0,focal,0.688178,0.608798,"[0.786265060240964, 0.5607476635514019, 0.4793...",,0.688178,0.608798,"[0.786265060240964, 0.5607476635514019, 0.4793...",512,0.1,0.0005,12,2,True,0.44,768
0,focal,0.687584,0.604925,"[0.7859782997496125, 0.5498619208346118, 0.478...",,0.687584,0.604925,"[0.7859782997496125, 0.5498619208346118, 0.478...",512,0.1,0.0005,123,2,True,0.44,768


In [20]:
ffn_mean_history_kfold.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()

  ffn_mean_history_kfold.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,f1,valid_accuracy,valid_f1,seed,gamma,k_fold,model_id,input_dim
hidden_dim,dropout_rate,learning_rate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
32,0.1,0.0001,0.690796,0.604337,0.690796,0.604337,274.0,2.0,1.0,0.7,768.0
32,0.1,0.0005,0.68797,0.605161,0.68797,0.605161,274.0,2.0,1.0,0.8,768.0
32,0.1,0.001,0.687019,0.606028,0.687019,0.606028,274.0,2.0,1.0,0.6,768.0
32,0.2,0.0001,0.69029,0.603632,0.69029,0.603632,274.0,2.0,1.0,0.4,768.0
32,0.2,0.0005,0.688297,0.605943,0.688297,0.605943,274.0,2.0,1.0,0.5,768.0
32,0.2,0.001,0.6869,0.606118,0.6869,0.606118,274.0,2.0,1.0,0.3,768.0
32,0.5,0.0001,0.694989,0.606623,0.694989,0.606623,274.0,2.0,1.0,0.1,768.0
32,0.5,0.0005,0.691688,0.609917,0.691688,0.609917,274.0,2.0,1.0,0.2,768.0
32,0.5,0.001,0.691004,0.608844,0.691004,0.608844,274.0,2.0,1.0,0.0,768.0
64,0.1,0.0001,0.69032,0.606764,0.69032,0.606764,274.0,2.0,1.0,0.16,768.0


In [17]:
best_ffn_mean_history_kfold

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,input_dim
0,focal,0.692045,0.611699,"[0.789056875449964, 0.5618525289457648, 0.4841...",,0.692045,0.611699,"[0.789056875449964, 0.5618525289457648, 0.4841...",512,0.5,0.001,0,2,True,768
0,focal,0.693383,0.60813,"[0.7931526390870186, 0.55572803477181, 0.47550...",,0.693383,0.60813,"[0.7931526390870186, 0.55572803477181, 0.47550...",512,0.5,0.001,1,2,True,768
0,focal,0.687435,0.611553,"[0.7829869502174964, 0.5614143920595532, 0.490...",,0.687435,0.611553,"[0.7829869502174964, 0.5614143920595532, 0.490...",512,0.5,0.001,12,2,True,768
0,focal,0.695613,0.613517,"[0.7933000712758376, 0.5556600815302603, 0.491...",,0.695613,0.613517,"[0.7933000712758376, 0.5556600815302603, 0.491...",512,0.5,0.001,123,2,True,768
0,focal,0.689963,0.613183,"[0.7852672123254695, 0.5613601236476042, 0.492...",,0.689963,0.613183,"[0.7852672123254695, 0.5613601236476042, 0.492...",512,0.5,0.001,1234,2,True,768


In [18]:
best_ffn_mean_history_kfold["f1"].mean()

0.6116161723937321

In [19]:
np.stack(best_ffn_mean_history_kfold["f1_scores"]).mean(axis=0)

array([0.78875275, 0.55920303, 0.48689274])