In [1]:
import numpy as np
import pickle
import os

seed = 2023

In [3]:
from nlpsig_networks.scripts.ffn_baseline_functions import (
    histories_baseline_hyperparameter_search
)

In [4]:
output_dir = "client_talk_type_output"
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

## AnnoMI

In [5]:
%run ../load_anno_mi.py

In [6]:
anno_mi.head()

Unnamed: 0,mi_quality,transcript_id,topic,utterance_id,interlocutor,timestamp,utterance_text,annotator_id,therapist_input_exists,therapist_input_subtype,reflection_exists,reflection_subtype,question_exists,question_subtype,main_therapist_behaviour,client_talk_type,datetime
0,high,0,reducing alcohol consumption,0,therapist,00:00:13,Thanks for filling it out. We give this form t...,3,False,,False,,True,open,question,,2023-07-05 00:00:13
1,high,0,reducing alcohol consumption,1,client,00:00:24,Sure.,3,,,,,,,,neutral,2023-07-05 00:00:24
2,high,0,reducing alcohol consumption,2,therapist,00:00:25,"So, let's see. It looks that you put-- You dri...",3,True,information,False,,False,,therapist_input,,2023-07-05 00:00:25
3,high,0,reducing alcohol consumption,3,client,00:00:34,Mm-hmm.,3,,,,,,,,neutral,2023-07-05 00:00:34
4,high,0,reducing alcohol consumption,4,therapist,00:00:34,-and you usually have three to four drinks whe...,3,True,information,False,,False,,therapist_input,,2023-07-05 00:00:34


In [7]:
with open("../anno_mi_sbert.pkl", "rb") as f:
    sbert_embeddings = pickle.load(f)
    
sbert_embeddings.shape

(13551, 384)

# Baseline: Averaging history and use FFN

Here, we average the full history of a path and concatenate it the current embedding (the total number of features that are passed into the FFN is `2 * sbert_embeddings.shape[0]`).

Here, we will run the hyperparameter search to implement the FFN with the same parameters as the standard FFN baseline on the sentence embeddings. Going to try out some variations (1 hidden layer, 2 hidden layers and 3 hidden layers - all of size 100).

In [16]:
num_epochs = 100
hidden_dim_sizes = [[32,32],[64,64],[128,128],[256,256],[512,512]]
dropout_rates = [0.5, 0.2, 0.1]
learning_rates = [1e-3, 1e-4, 5e-4]
seeds = [0, 1, 12, 123, 1234]
loss = "focal"
gamma = 2
validation_metric = "f1"

In [17]:
hidden_dim_sizes

[[32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]

In [18]:
learning_rates

[0.001, 0.0001, 0.0005]

In [10]:
ffn_mean_history, best_ffn_mean_history, _, __ = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=anno_mi,
    id_column="transcript_id",
    label_column="client_talk_type",
    embeddings=sbert_embeddings,
    y_data=y_data_client,
    output_dim=output_dim_client,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    path_indices=client_index,
    k_fold=False,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_mean_history_focal_{gamma}.csv",
    verbose=False
)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2_best_model.csv


In [11]:
ffn_mean_history

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,n_splits,batch_size,model_id,input_dim
0,focal,0.707807,0.637682,"[0.8004822182037371, 0.5656877897990726, 0.546...",0.557258,0.723978,0.643533,"[0.8156920799407845, 0.5921787709497207, 0.522...","(32, 32)",0.5,0.0010,0,2,False,,64,0.00,768
0,focal,0.707063,0.635630,"[0.7983343248066627, 0.5575364667747164, 0.551...",0.516384,0.724907,0.637553,"[0.8231884057971014, 0.5604838709677419, 0.528...","(32, 32)",0.5,0.0010,1,2,False,,64,0.00,768
0,focal,0.715985,0.651742,"[0.8033877797943134, 0.5789473684210525, 0.572...",0.517636,0.721190,0.634793,"[0.8210059171597633, 0.5780346820809249, 0.505...","(32, 32)",0.5,0.0010,12,2,False,,64,0.00,768
0,focal,0.724907,0.656570,"[0.8109069353882632, 0.5828025477707006, 0.576]",0.539345,0.722119,0.631979,"[0.8208846990572878, 0.5636007827788649, 0.511...","(32, 32)",0.5,0.0010,123,2,False,,64,0.00,768
0,focal,0.711524,0.655552,"[0.7927363807138385, 0.6055555555555555, 0.568...",0.523868,0.723048,0.647640,"[0.8115501519756839, 0.6116838487972508, 0.519...","(32, 32)",0.5,0.0010,1234,2,False,,64,0.00,768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,focal,0.689963,0.635935,"[0.7703134996801024, 0.5936305732484077, 0.543...",0.538941,0.719331,0.649549,"[0.7987616099071206, 0.6277602523659306, 0.522...","(256, 256)",0.1,0.0005,0,2,False,,64,0.35,768
0,focal,0.736059,0.656205,"[0.8225157955198162, 0.5882352941176471, 0.557...",0.648197,0.749071,0.657204,"[0.8362863217576186, 0.60990099009901, 0.52542...","(256, 256)",0.1,0.0005,1,2,False,,64,0.35,768
0,focal,0.706320,0.652919,"[0.7872204472843449, 0.608695652173913, 0.5628...",0.496021,0.710967,0.641594,"[0.796887159533074, 0.6096774193548388, 0.5182...","(256, 256)",0.1,0.0005,12,2,False,,64,0.35,768
0,focal,0.718216,0.648541,"[0.8059880239520958, 0.5867895545314901, 0.552...",0.501778,0.734201,0.645116,"[0.825674690007294, 0.605607476635514, 0.50406...","(256, 256)",0.1,0.0005,123,2,False,,64,0.35,768


In [12]:
ffn_mean_history.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()

  ffn_mean_history.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,f1,valid_loss,valid_accuracy,valid_f1,seed,gamma,k_fold,batch_size,model_id,input_dim
hidden_dim,dropout_rate,learning_rate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"(32, 32)",0.1,0.0001,0.705279,0.631973,0.50558,0.709665,0.616578,274.0,2.0,0.0,64.0,0.7,768.0
"(32, 32)",0.1,0.0005,0.709294,0.645135,0.495598,0.717844,0.634849,274.0,2.0,0.0,64.0,0.8,768.0
"(32, 32)",0.1,0.001,0.715093,0.653036,0.519766,0.720446,0.642409,274.0,2.0,0.0,64.0,0.6,768.0
"(32, 32)",0.2,0.0001,0.707658,0.635425,0.502401,0.713011,0.619403,274.0,2.0,0.0,64.0,0.4,768.0
"(32, 32)",0.2,0.0005,0.712862,0.647593,0.518318,0.723978,0.642511,274.0,2.0,0.0,64.0,0.5,768.0
"(32, 32)",0.2,0.001,0.717621,0.651459,0.545165,0.724721,0.640122,274.0,2.0,0.0,64.0,0.3,768.0
"(32, 32)",0.5,0.0001,0.708401,0.633239,0.517414,0.7171,0.621545,274.0,2.0,0.0,64.0,0.1,768.0
"(32, 32)",0.5,0.0005,0.716877,0.651253,0.500579,0.725465,0.64302,274.0,2.0,0.0,64.0,0.2,768.0
"(32, 32)",0.5,0.001,0.713457,0.647435,0.530898,0.723048,0.639099,274.0,2.0,0.0,64.0,0.0,768.0
"(64, 64)",0.1,0.0001,0.707807,0.639748,0.500443,0.711152,0.623154,274.0,2.0,0.0,64.0,0.16,768.0


In [13]:
best_ffn_mean_history

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,n_splits,batch_size,input_dim
0,focal,0.720446,0.648575,"[0.8047337278106509, 0.5933734939759038, 0.547...",0.650181,0.746283,0.662751,"[0.8295866569978245, 0.6229508196721313, 0.535...","(256, 256)",0.5,0.0005,0,2,False,,64,768
0,focal,0.724907,0.65391,"[0.8079951544518473, 0.6268221574344024, 0.526...",0.602107,0.752788,0.67453,"[0.8309037900874635, 0.6424682395644283, 0.550...","(256, 256)",0.5,0.0005,1,2,False,,64,768
0,focal,0.736803,0.667662,"[0.822262118491921, 0.6101190476190476, 0.5706...",0.520988,0.748141,0.663923,"[0.8329686360320934, 0.6288848263254113, 0.529...","(256, 256)",0.5,0.0005,12,2,False,,64,768
0,focal,0.717472,0.650839,"[0.8052884615384615, 0.5812807881773399, 0.565...",0.51145,0.73513,0.655305,"[0.8275862068965517, 0.594704684317719, 0.5436...","(256, 256)",0.5,0.0005,123,2,False,,64,768
0,focal,0.730112,0.65883,"[0.8172043010752688, 0.6049382716049382, 0.554...",0.507676,0.743494,0.656029,"[0.831755280407866, 0.6196660482374767, 0.5166...","(256, 256)",0.5,0.0005,1234,2,False,,64,768


In [14]:
best_ffn_mean_history["f1"].mean()

0.6559632671143347

In [15]:
np.stack(best_ffn_mean_history["f1_scores"]).mean(axis=0)

array([0.81149675, 0.60330675, 0.5530863 ])

## KFold

We can repeat this but use K-Fold evaluation instead - by default, we have $K=5$ folds.

In [10]:
ffn_mean_history_kfold, best_ffn_mean_history_kfold, _, __ = histories_baseline_hyperparameter_search(
    num_epochs=num_epochs,
    df=anno_mi,
    id_column="transcript_id",
    label_column="client_talk_type",
    embeddings=sbert_embeddings,
    y_data=y_data_client,
    output_dim=output_dim_client,
    hidden_dim_sizes=hidden_dim_sizes,
    dropout_rates=dropout_rates,
    learning_rates=learning_rates,
    use_signatures=False,
    seeds=seeds,
    loss=loss,
    gamma=gamma,
    path_indices=client_index,
    k_fold=True,
    validation_metric=validation_metric,
    results_output=f"{output_dir}/ffn_mean_history_focal_{gamma}_kfold.csv",
    verbose=False
)

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[INFO] Concatenating the embeddings to the dataframe...
[INFO] - columns beginning with 'e' denote the full embddings.
[INFO] Adding time feature columns into dataframe in `.df`.
[INFO] Adding 'time_encoding' and feature...
[INFO] Adding 'time_diff' and feature...
[INFO] Adding 'timeline_index' feature...
Computing the mean history for each item in the dataframe


  0%|          | 0/13551 [00:00<?, ?it/s]

saving results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2_kfold.csv
saving the best model results dataframe to CSV for this hyperparameter search in client_talk_type_output/ffn_mean_history_focal_2_kfold_best_model.csv


In [11]:
ffn_mean_history_kfold

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,n_splits,batch_size,model_id,input_dim
0,focal,0.695167,0.616388,"[0.7938021454112038, 0.5463446475195822, 0.509...",,0.695167,0.616388,"[0.7938021454112038, 0.5463446475195822, 0.509...","(32, 32)",0.5,0.0010,0,2,True,5,64,0.00,768
0,focal,0.696952,0.619038,"[0.7922589893680565, 0.5625, 0.5023547880690739]",,0.696952,0.619038,"[0.7922589893680565, 0.5625, 0.5023547880690739]","(32, 32)",0.5,0.0010,1,2,True,5,64,0.00,768
0,focal,0.698885,0.624826,"[0.792836398838335, 0.5724748245346353, 0.5091...",,0.698885,0.624826,"[0.792836398838335, 0.5724748245346353, 0.5091...","(32, 32)",0.5,0.0010,12,2,True,5,64,0.00,768
0,focal,0.693383,0.610100,"[0.7936695405692689, 0.5430597771023303, 0.493...",,0.693383,0.610100,"[0.7936695405692689, 0.5430597771023303, 0.493...","(32, 32)",0.5,0.0010,123,2,True,5,64,0.00,768
0,focal,0.696059,0.621972,"[0.7898955550157882, 0.5755003032140692, 0.500...",,0.696059,0.621972,"[0.7898955550157882, 0.5755003032140692, 0.500...","(32, 32)",0.5,0.0010,1234,2,True,5,64,0.00,768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,focal,0.698439,0.624761,"[0.7915407854984895, 0.5745274248528044, 0.508...",,0.698439,0.624761,"[0.7915407854984895, 0.5745274248528044, 0.508...","(256, 256)",0.1,0.0005,0,2,True,5,64,0.35,768
0,focal,0.689219,0.617279,"[0.781838047832949, 0.5653365823941517, 0.5046...",,0.689219,0.617279,"[0.781838047832949, 0.5653365823941517, 0.5046...","(256, 256)",0.1,0.0005,1,2,True,5,64,0.35,768
0,focal,0.694126,0.619711,"[0.7874168179068362, 0.5732718894009217, 0.498...",,0.694126,0.619711,"[0.7874168179068362, 0.5732718894009217, 0.498...","(256, 256)",0.1,0.0005,12,2,True,5,64,0.35,768
0,focal,0.690855,0.617581,"[0.7853569249542403, 0.5709978896593307, 0.496...",,0.690855,0.617581,"[0.7853569249542403, 0.5709978896593307, 0.496...","(256, 256)",0.1,0.0005,123,2,True,5,64,0.35,768


In [12]:
ffn_mean_history_kfold.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()

  ffn_mean_history_kfold.groupby(["hidden_dim", "dropout_rate", "learning_rate"]).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,accuracy,f1,valid_accuracy,valid_f1,seed,gamma,k_fold,n_splits,batch_size,model_id,input_dim
hidden_dim,dropout_rate,learning_rate,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
"(32, 32)",0.1,0.0001,0.69032,0.612854,0.69032,0.612854,274.0,2.0,1.0,5.0,64.0,0.7,768.0
"(32, 32)",0.1,0.0005,0.688952,0.614493,0.688952,0.614493,274.0,2.0,1.0,5.0,64.0,0.8,768.0
"(32, 32)",0.1,0.001,0.687851,0.616291,0.687851,0.616291,274.0,2.0,1.0,5.0,64.0,0.6,768.0
"(32, 32)",0.2,0.0001,0.691955,0.614918,0.691955,0.614918,274.0,2.0,1.0,5.0,64.0,0.4,768.0
"(32, 32)",0.2,0.0005,0.690736,0.616807,0.690736,0.616807,274.0,2.0,1.0,5.0,64.0,0.5,768.0
"(32, 32)",0.2,0.001,0.691955,0.618927,0.691955,0.618927,274.0,2.0,1.0,5.0,64.0,0.3,768.0
"(32, 32)",0.5,0.0001,0.695524,0.615719,0.695524,0.615719,274.0,2.0,1.0,5.0,64.0,0.1,768.0
"(32, 32)",0.5,0.0005,0.696892,0.620274,0.696892,0.620274,274.0,2.0,1.0,5.0,64.0,0.2,768.0
"(32, 32)",0.5,0.001,0.696089,0.618465,0.696089,0.618465,274.0,2.0,1.0,5.0,64.0,0.0,768.0
"(64, 64)",0.1,0.0001,0.691004,0.613723,0.691004,0.613723,274.0,2.0,1.0,5.0,64.0,0.16,768.0


In [13]:
best_ffn_mean_history_kfold

Unnamed: 0,loss,accuracy,f1,f1_scores,valid_loss,valid_accuracy,valid_f1,valid_f1_scores,hidden_dim,dropout_rate,learning_rate,seed,gamma,k_fold,n_splits,batch_size,input_dim
0,focal,0.691152,0.623333,"[0.7843962848297213, 0.5783277995774223, 0.507...",,0.691152,0.623333,"[0.7843962848297213, 0.5783277995774223, 0.507...","(128, 128)",0.5,0.001,0,2,True,5,64,768
0,focal,0.697844,0.621761,"[0.794698795180723, 0.5646085295989816, 0.5059...",,0.697844,0.621761,"[0.794698795180723, 0.5646085295989816, 0.5059...","(128, 128)",0.5,0.001,1,2,True,5,64,768
0,focal,0.699628,0.626092,"[0.7914503079338244, 0.5781729000613122, 0.508...",,0.699628,0.626092,"[0.7914503079338244, 0.5781729000613122, 0.508...","(128, 128)",0.5,0.001,12,2,True,5,64,768
0,focal,0.700074,0.621861,"[0.7954680977936793, 0.5654819084213897, 0.504...",,0.700074,0.621861,"[0.7954680977936793, 0.5654819084213897, 0.504...","(128, 128)",0.5,0.001,123,2,True,5,64,768
0,focal,0.699777,0.625108,"[0.7943159922928709, 0.5693568726355612, 0.511...",,0.699777,0.625108,"[0.7943159922928709, 0.5693568726355612, 0.511...","(128, 128)",0.5,0.001,1234,2,True,5,64,768


In [14]:
best_ffn_mean_history_kfold["f1"].mean()

0.6236310849908926

In [15]:
np.stack(best_ffn_mean_history_kfold["f1_scores"]).mean(axis=0)

array([0.7920659 , 0.5711896 , 0.50763776])