# Questioning the Effect of Physiological Heartbeat Synchrony in Romantic Dyads. A Preregistered Deep Learning Analysis.

In [1]:
# For Google Colab / local machine
import tensorflow.keras
ver = tensorflow.version.VERSION

if float(ver[:3]) > 2.7:
  print("Latest TensorFlow version detected -> Prepare Google Colab usage\n")
  google_colab = 1
  from google.colab import drive
  drive.mount('/content/drive/')
  colab_path = "/content/drive/MyDrive/Masterarbeit/Code/two-hearts/"
  import sys
  sys.path.append(colab_path)
else:
  colab_path = ""

In [2]:
# Import libraries
import os
import datetime
import random
import pickle
import IPython
import IPython.display
import numpy as np
from numpy import array, hstack
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import deserialize, serialize
from tensorflow.python.keras.saving import saving_utils
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Input, BatchNormalization, multiply, concatenate, Flatten, Activation, dot
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
import pydot as pyd
from tensorflow.keras.utils import plot_model, model_to_dot
tensorflow.keras.utils.pydot = pyd

from lists import dyads, num_dyads, participants, num_participants

print("TensorFlow version:",tensorflow.version.VERSION)

TensorFlow version: 2.6.0


## Deep Learning

In [3]:
# Set sampling rate
sampling_rate = 50
print("Sampling rate:", sampling_rate)

# Set number of time steps
n_steps_in, n_steps_out = 5*sampling_rate, 2*sampling_rate
print("Time steps:", n_steps_in, n_steps_out)

# Set conditions
condition = ["sit","gaze","gaze_swap"]
print("Conditions:", condition)

# Show number of participants
print("Participants:", num_participants)

# Show number of dyads
print("Dyads:", num_dyads)

# Set number of trials
trial = ["01","02","03"]
print("Number of training trials:",len(trial))

Sampling rate: 50
Time steps: 250 100
Conditions: ['sit', 'gaze', 'gaze_swap']
Participants: 4
Dyads: 2
Number of training trials: 3


In [34]:
# Prepare sample data
def sample_preperation(condition):

    # Load data
    data = np.load(f"{colab_path}data/data_{condition}.npy")
    print(f"Loaded data with shape {data.shape} and type {data.dtype}")

    # Create empty samples
    X_input_train = X_input_vali = X_input_test = np.empty((0, n_steps_in, 2))
    y_output_train = y_output_vali = y_output_test = np.empty((0, n_steps_out, 2))

    # Split a multivariate sequence into samples (modified from Brownlee 2018, p.156)
    def split_sequences(sequences, n_steps_in, n_steps_out):
        X, y = list(), list()
        for i in range(len(sequences)):
            if i % (sampling_rate) == 0: # to remove redundancy in samples
                # find the end of this pattern
                end_ix = i + n_steps_in
                out_end_ix = end_ix + n_steps_out
                # check if we are beyond the dataset
                if out_end_ix > len(sequences):
                    break
                # gather input and output parts of the pattern
                seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
                X.append(seq_x)
                y.append(seq_y)
        return array(X), array(y)

    for i, idx in enumerate(list(range(num_participants))[::2]): # index for dyads
        # define input sequence
        in_seq1 = data[idx]
        in_seq2 = data[idx+1]
        # convert to [rows, columns] structure
        in_seq1 = in_seq1.reshape((len(in_seq1), 1))
        in_seq2 = in_seq2.reshape((len(in_seq2), 1))
        # horizontally stack columns
        dataset = hstack((in_seq1, in_seq2))
        # covert into input/output
        X, y = split_sequences(dataset, n_steps_in, n_steps_out)
        # data split
        num_train_samples = int(0.6 * len(X))
        num_val_samples = int(0.2 * len(X))
        num_test_samples = len(X) - num_train_samples - num_val_samples
        # append data for multiple dyads
        X_input_train = np.append(X_input_train,X[:num_train_samples],axis=0)
        y_output_train = np.append(y_output_train,y[:num_train_samples],axis=0)
        X_input_vali = np.append(X_input_vali,X[num_train_samples:(num_train_samples+num_val_samples)],axis=0)
        y_output_vali = np.append(y_output_vali,y[num_train_samples:(num_train_samples+num_val_samples)],axis=0)
        X_input_test = np.append(X_input_test,X[(num_train_samples+num_val_samples):],axis=0)
        y_output_test = np.append(y_output_test,y[(num_train_samples+num_val_samples):],axis=0)
        
    # Create dictionary
    samples = {
        "X_input_train": X_input_train,
        "y_output_train": y_output_train,
        "X_input_vali": X_input_vali,
        "y_output_vali": y_output_vali,
        "X_input_test": X_input_test,
        "y_output_test": y_output_test
    }
    
    print("Length of samples for each set:", len(X_input_train), len(X_input_vali), len(X_input_test))

    return samples

In [68]:
# Define simple seq2seq model 
# Modified from Wieniawska 2020 
# (https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb)

def lstm_decoder_encoder(samples, n_hidden = 100):
    # Input layer
    input_train = Input(shape=(samples["X_input_train"].shape[1], samples["X_input_train"].shape[2]))
    output_train = Input(shape=(samples["y_output_train"].shape[1], samples["y_output_train"].shape[2]))

    # Encoder LSTM with state_h and state_c
    encoder_last_h1, encoder_last_h2, encoder_last_c = LSTM(
    n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2, 
    return_sequences=False, return_state=True)(input_train)

    # Batch normalisation to avoid gradient explosion
    encoder_last_h1 = BatchNormalization(momentum=0.6)(encoder_last_h1)
    encoder_last_c = BatchNormalization(momentum=0.6)(encoder_last_c)

    # Decoder LSTM
    decoder = RepeatVector(output_train.shape[1])(encoder_last_h1)
    decoder = LSTM(n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2, return_state=False, return_sequences=True)(
        decoder, initial_state=[encoder_last_h1, encoder_last_c])

    # Dense layer with repeated weights
    out = TimeDistributed(Dense(output_train.shape[2]))(decoder)

    # Compile model
    model = Model(inputs=input_train, outputs=out)
    opt = Adam(learning_rate=0.001, clipnorm=1)
    model.compile(optimizer=opt, loss='mean_squared_error', metrics=['mae'])
    print(model)

    return model

In [69]:
# Define seq2seq model with Luong attention
# Modified from Wieniawska 2020 
# (https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb)

def lstm_decoder_encoder_luong_attention(samples, n_hidden = 100):
    # Input layer
    input_train = Input(shape=(samples["X_input_train.shape[1]"], samples["X_input_train.shape[2]"]))
    output_train = Input(shape=(samples["y_output_train.shape[1]"], samples["y_output_train.shape[2]"]))

    # Encoder LSTM
    encoder_stack_h, encoder_last_h, encoder_last_c = LSTM(
        n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2, 
        return_state=True, return_sequences=True)(input_train)

    # Batch normalisation to avoid gradient explosion
    encoder_last_h = BatchNormalization(momentum=0.6)(encoder_last_h)
    encoder_last_c = BatchNormalization(momentum=0.6)(encoder_last_c)

    # Decoder LSTM
    decoder_input = RepeatVector(output_train.shape[1])(encoder_last_h)

    decoder_stack_h = LSTM(n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2,
    return_state=False, return_sequences=True)(
    decoder_input, initial_state=[encoder_last_h, encoder_last_c])

    # Attention layer
    attention = dot([decoder_stack_h, encoder_stack_h], axes=[2, 2])
    attention = Activation('softmax')(attention)

    # Calculate context vector with batch normalisation
    context = dot([attention, encoder_stack_h], axes=[2,1])
    context = BatchNormalization(momentum=0.6)(context)

    # Combine context vector with stacked hidden states of decoder for input to the last dense layer
    decoder_combined_context = concatenate([context, decoder_stack_h])

    # Dense layer with repeated weights
    out = TimeDistributed(Dense(output_train.shape[2]))(decoder_combined_context)

    # Compile model
    model = Model(inputs=input_train, outputs=out)
    opt = Adam(learning_rate=0.001, clipnorm=1)
    model.compile(loss='mean_squared_error', optimizer=opt, metrics=['mae'])
    print(model)

    return model

In [70]:
# Fit model
def fit_model(model, samples):
    epc = 500
    es = EarlyStopping(monitor='val_loss', mode='min', patience=50, restore_best_weights=True)
    history = model.fit(samples["X_input_train"], samples["y_output_train"],  validation_data=(samples["X_input_vali"],samples["y_output_vali"]), 
                        epochs=epc, verbose=1, callbacks=[es], 
                        batch_size=64, shuffle=False)

    return model, history

In [71]:
# Execute over all conditions for n trials
samples_all = {}
model_all = {}
history_all = {}
results_all = {}
data_all = {}
for x in trial: data_all[x] = {} # prepare nested dictionary

# iterate over trials
for j in range(len(trial)):
    # set random number generator for each trial
    os.environ['PYTHONHASHSEED']=str(j)
    random.seed(j)
    np.random.seed(j)
    tensorflow.random.set_seed(j)
    # iterate over conditions
    for i in range(len(condition)):
        samples = sample_preperation(condition[i])
        samples_all[condition[i]] = samples
        model = lstm_decoder_encoder(samples) # use lstm_decoder_encoder_luong_attention() for model with attention
        model, history = fit_model(model, samples)
        model_path = f"model/model_{trial[j]}_{condition[i]}.h5"
        model_all[condition[i]] = model_path # models aren't pickable in tensorflow < 2.7 and can't be imported via pickle
        model.save(model_path) # -> using model.save() instead
        history_all[condition[i]] = history.history
        results = model.evaluate(samples["X_input_test"], samples["y_output_test"], batch_size=64)
        results_all[condition[i]] = results
    # deepcopy to prevent reusing the same dictionary
    samples_all_copy = deepcopy(samples_all)
    model_all_copy = deepcopy(model_all)
    history_all_copy = deepcopy(history_all)
    results_all_copy = deepcopy(results_all)
    # create nested dictionary for everything
    data_all[trial[j]]["samples_all"] = samples_all_copy
    data_all[trial[j]]["model_all"] = model_all_copy
    data_all[trial[j]]["history_all"] = history_all_copy
    data_all[trial[j]]["results_all"] = results_all_copy

Loaded data with shape (4, 14800) and type float32
Number of dyads: 2
num_train_samples per dyad: 174
num_val_samples per dyad: 58
num_test_samples per dyad: 58
Length of samples for each set: 348 116 116
Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_19 (InputLayer)          [(None, 250, 2)]     0           []                               
                                                                                                  
 lstm_18 (LSTM)                 [(None, 100),        41200       ['input_19[0][0]']               
                                 (None, 100),                                                     
                                 (None, 100)]                                                     
                                                                                     

In [72]:
for x in trial: print(data_all[x]["results_all"])

{'sit': [0.04362959787249565, 0.14726123213768005], 'gaze': [0.030924485996365547, 0.12523052096366882], 'gaze_swap': [0.034299299120903015, 0.13102079927921295]}
{'sit': [0.04553426429629326, 0.15072990953922272], 'gaze': [0.03611404448747635, 0.13419637084007263], 'gaze_swap': [0.03472951799631119, 0.13194513320922852]}


In [None]:
# save dictionary as pickle file
with open('data_all.pickle', 'wb') as handle:
    pickle.dump(data_all, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [78]:
if google_colab == 1:
  from google.colab import files
  files.download('data_all.pickle')
  for x in list(data_all.keys()):
    for y in list(data_all[x]["model_all"].keys()):
      files.download((data_all[x]["model_all"][y]))
    # load.model(...)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>