# Does the Physiological Synchrony of Heartbeats in Romantic Dyads really exist? A Deep Learning Analysis.

In [6]:
# For Google Colab / local machine
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    from google.colab import drive
    drive.mount('/content/drive/')
    colab_path = "/content/drive/MyDrive/Masterarbeit/Code/two-hearts/"
    import sys
    sys.path.append(colab_path)
else:
    print('Not running on CoLab')
    colab_path = ""

Running on CoLab
Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [7]:
# Import libraries
import os
import datetime
import random
import pickle
import IPython
import IPython.display
import numpy as np
from numpy import array, hstack
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow
from tensorflow.python.keras.layers import deserialize, serialize
from tensorflow.python.keras.saving import saving_utils
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Dense, RepeatVector,\
    TimeDistributed, Input, BatchNormalization, multiply,\
    concatenate, Flatten, Activation, dot
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
import pydot as pyd
from tensorflow.keras.utils import plot_model, model_to_dot
tensorflow.keras.utils.pydot = pyd

# Import lists
from lists import dyads, num_dyads, participants, num_participants

print("TensorFlow version:", tensorflow.version.VERSION)

TensorFlow version: 2.8.2


In [8]:
def seed_value(seed):
    """Function for controlling random number generator
    """
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tensorflow.random.set_seed(seed)

## Deep Learning

In [9]:
# Set sampling rate
sampling_rate = 50
print("Sampling rate:", sampling_rate)

# Set number of time steps
n_steps_in, n_steps_out = 5*sampling_rate, 2*sampling_rate
print("Time steps:", n_steps_in, n_steps_out)

# Set conditions
conditions = ["sit", "gaze", "gaze_swap"]
print("Conditions:", conditions)

# Show number of participants
print("Participants:", num_participants)

# Show number of dyads
print("Dyads:", num_dyads)

# Set number of runs
runs = ["01", "02", "03","04","05","06","07","08","09","10"]
# runs = list(range(0,3))

print("Number of training runs:", len(runs))

Sampling rate: 50
Time steps: 250 100
Conditions: ['sit', 'gaze', 'gaze_swap']
Participants: 8
Dyads: 4
Number of training runs: 5


In [10]:
# Prepare sample data
def sample_preparation(condition):

    # Load data
    data = np.load(f"{colab_path}data/data_{condition}.npy")
    print(f"Loaded data with shape {data.shape} and type {data.dtype}")

    # Create empty samples
    X_input_train = np.empty(
        (0, n_steps_in, 2))
    X_input_vali = np.empty(
        (0, n_steps_in, 2))
    X_input_test = np.empty(
        (0, n_steps_in, 2))
    y_output_train = np.empty(
        (0, n_steps_out, 2))
    y_output_vali = np.empty(
        (0, n_steps_out, 2))
    y_output_test = np.empty(
        (0, n_steps_out, 2))

    # Split a multivariate sequence into samples (modified from Brownlee 2018, p.156)
    def split_sequences(
            sequences,
            n_steps_in,
            n_steps_out):
        X, y = list(), list()
        for i in range(len(sequences)):
            if i % (sampling_rate) == 0:  # to reduce redundancy in samples
                # find the end of this pattern
                end_ix = i + n_steps_in
                out_end_ix = end_ix + n_steps_out
                # check if we are beyond the dataset
                if out_end_ix > len(sequences):
                    break
                # gather input and output parts of the pattern
                seq_x, seq_y = sequences[i:end_ix,:], sequences[end_ix:out_end_ix, :]
                X.append(seq_x)
                y.append(seq_y)
        return array(X), array(y)

    # index for dyads
    for x in participants[::2]:
        # define input sequence
        in_seq1 = data[x]
        in_seq2 = data[x+1]
        # convert to [rows, columns] structure
        in_seq1 = in_seq1.reshape((len(in_seq1), 1))
        in_seq2 = in_seq2.reshape((len(in_seq2), 1))
        # horizontally stack columns
        dataset = hstack((in_seq1, in_seq2))
        # covert into input/output
        X, y = split_sequences(
            dataset,
            n_steps_in,
            n_steps_out)
        # data split
        num_train_samples = int(0.6 * len(X))
        num_val_samples = int(0.2 * len(X))
        num_test_samples = len(X) - num_train_samples - num_val_samples
        # append data for multiple dyads
        X_input_train = np.append(
            X_input_train, 
            X[:num_train_samples], 
            axis=0)
        y_output_train = np.append(
            y_output_train, 
            y[:num_train_samples], 
            axis=0)
        X_input_vali = np.append(
            X_input_vali, 
            X[num_train_samples:(num_train_samples+num_val_samples)],
            axis=0)
        y_output_vali = np.append(
            y_output_vali, 
            y[num_train_samples:(num_train_samples+num_val_samples)],
            axis=0)
        X_input_test = np.append(
            X_input_test,
            X[(num_train_samples+num_val_samples):], 
            axis=0)
        y_output_test = np.append(
            y_output_test, 
            y[(num_train_samples+num_val_samples):], 
            axis=0)

    # Create dictionary
    samples = {
        "X_input_train": X_input_train,
        "y_output_train": y_output_train,
        "X_input_vali": X_input_vali,
        "y_output_vali": y_output_vali,
        "X_input_test": X_input_test,
        "y_output_test": y_output_test
    }

    print("Length of samples for each set:", len(
        X_input_train), len(X_input_vali), len(X_input_test))

    return samples


In [11]:
# Define simple seq2seq model
# Modified from Wieniawska 2020
# (https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb)

def lstm_decoder_encoder(samples, units=100):
    # Input layer
    input_train = Input(
        shape=(
            samples["X_input_train"].shape[1],
            samples["X_input_train"].shape[2]))
    output_train = Input(
        shape=(
            samples["y_output_train"].shape[1],
            samples["y_output_train"].shape[2]))

    # Encoder LSTM with last state_h and state_c
    encoder_last_h1, encoder_last_h2, encoder_last_c = LSTM(
        units,
        activation='elu',
        recurrent_dropout=0.1,
        return_sequences=False,
        return_state=True)(
            input_train)

    # Batch normalisation to avoid gradient explosion
    # center=False as mean is always 0
    encoder_last_h1 = BatchNormalization(
        momentum=0.6,
        center=False)(
            encoder_last_h1)
    encoder_last_c = BatchNormalization(
        momentum=0.6,
        center=False)(
            encoder_last_c)

    # Decoder LSTM
    decoder = RepeatVector(output_train.shape[1])(
        encoder_last_h1)
    decoder = LSTM(
        units,
        activation='elu',
        return_state=False,
        return_sequences=True)(
            decoder,
            initial_state=[
                encoder_last_h1,
                encoder_last_c])

    # Dense layer with repeated weights
    out = TimeDistributed(Dense(output_train.shape[2]))(
        decoder)

    # Compile model
    model = Model(
        inputs=input_train,
        outputs=out)
    opt = Adam(
        learning_rate=0.001,
        clipnorm=1)
    model.compile(
        optimizer=opt,
        loss='mse',
        metrics=['mae'])

    return model


In [16]:
# Fit model
def fit_model(model, samples, epc=500, patience=50):
    epc = 500
    es = EarlyStopping(
        monitor='val_loss', 
        mode='min',
        patience=50, 
        restore_best_weights=True)
    history = model.fit(
        samples["X_input_train"], 
        samples["y_output_train"],  
        validation_data=(
            samples["X_input_vali"], 
            samples["y_output_vali"]),
        epochs=epc, 
        verbose=0,
        callbacks=[es],
        batch_size=64, 
        shuffle=True)

    return model, history

In [17]:
# Execute over all conditions for n trials
samples_all = {}
model_all = {}
history_all = {}
results_all = {}
predictions_all = {}
data_all = {}
for run in runs: data_all[run] = {}  # prepare nested dictionary

# iterate over runs
for j,run in enumerate(runs):
    seed_value(j) # control random number generator for each run
    # iterate over conditions
    for i,con in enumerate(conditions):
        print(f"Run: {run}, Condition: {con}")
        samples = sample_preparation(con)
        samples_all[con] = samples
        model = lstm_decoder_encoder(samples)
        model, history = fit_model(model, samples)
        model_path = f"{colab_path}model/model_{run}_{con}.h5"
        # extra path because models aren't pickable in tensorflow < 2.7 and can't be imported via pickle
        model_all[con] = model_path
        model.save(model_path)  # -> using model.save() instead
        history_all[con] = history.history
        results = model.evaluate(
            samples["X_input_test"], 
            samples["y_output_test"], 
            batch_size=64)
        results_all[con] = results
        predictions = model.predict(
            samples["X_input_test"], 
            batch_size=64)
        predictions_all[con] = predictions
    # deepcopy to prevent reusing the same dictionary
    samples_all_copy = deepcopy(samples_all) # (samples are the same in each run)
    model_all_copy = deepcopy(model_all)
    history_all_copy = deepcopy(history_all)
    results_all_copy = deepcopy(results_all)
    predictions_all_copy = deepcopy(predictions_all)
    # create nested dictionary for everything
    data_all[run]["samples_all"] = samples_all_copy
    data_all[run]["model_all"] = model_all_copy
    data_all[run]["history_all"] = history_all_copy
    data_all[run]["results_all"] = results_all_copy
    data_all[run]["predictions_all"] = predictions_all_copy

Run: 0, Condition: sit
Loaded data with shape (8, 14800) and type float32
Length of samples for each set: 696 232 232
Run: 0, Condition: gaze
Loaded data with shape (8, 14400) and type float64
Length of samples for each set: 676 224 228
Run: 0, Condition: gaze_swap
Loaded data with shape (8, 14800) and type float32
Length of samples for each set: 696 232 232
Run: 1, Condition: sit
Loaded data with shape (8, 14800) and type float32
Length of samples for each set: 696 232 232
Run: 1, Condition: gaze
Loaded data with shape (8, 14400) and type float64
Length of samples for each set: 676 224 228
Run: 1, Condition: gaze_swap
Loaded data with shape (8, 14800) and type float32
Length of samples for each set: 696 232 232
Run: 2, Condition: sit
Loaded data with shape (8, 14800) and type float32
Length of samples for each set: 696 232 232
Run: 2, Condition: gaze
Loaded data with shape (8, 14400) and type float64
Length of samples for each set: 676 224 228
Run: 2, Condition: gaze_swap
Loaded data 

In [18]:
for x in runs: print(x,data_all[x]["results_all"])

01 {'sit': [0.028530549257993698, 0.11703720688819885], 'gaze': [0.026570089161396027, 0.11031073331832886], 'gaze_swap': [0.02831937000155449, 0.1124381572008133]}
02 {'sit': [0.028441406786441803, 0.11573377996683121], 'gaze': [0.025277702137827873, 0.10686317086219788], 'gaze_swap': [0.02458532527089119, 0.106587253510952]}
03 {'sit': [0.026579268276691437, 0.11221860349178314], 'gaze': [0.026148684322834015, 0.11074282974004745], 'gaze_swap': [0.02592020481824875, 0.10809149593114853]}
04 {'sit': [0.027065759524703026, 0.1111808642745018], 'gaze': [0.028018085286021233, 0.116024449467659], 'gaze_swap': [0.027279580011963844, 0.11353939026594162]}
05 {'sit': [0.026910264045000076, 0.1107400432229042], 'gaze': [0.025553440675139427, 0.11243768036365509], 'gaze_swap': [0.02688203938305378, 0.11066733300685883]}


In [19]:
# save dictionary as pickle file
with open(f'{colab_path}model/data_all.pickle', 'wb') as handle:
    pickle.dump(data_all, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [20]:
if colab_path:
    from google.colab import files
    files.download(f'{colab_path}model/data_all.pickle')
    for x in list(data_all.keys()):
        for y in list(data_all[x]["model_all"].keys()):
            files.download((data_all[x]["model_all"][y]))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>