# **Randomize data**

This script is for randomizing data to enable blinded analysis.

In [6]:
import pandas as pd
import numpy as np
import random
import os

## **Generate random number for each condition**

In [2]:
assignment_save_dir = '../data/assignment/'
assignment_save_file = 'assignment.txt'
mindware_dir = '../data/mindware_processed/'
data_dir = '../data/preprocessed/'

In [3]:
# conditions and numbers to assign
conditions = ["positive", "negative", "neutral"]
numbers = [1, 2, 3]

# randomly shuffle the numbers
random.shuffle(numbers)

# assign the randomly shuffled numbers to the conditions
random_assignment = dict(zip(conditions, numbers))

# save the random assignment to a file
if  not os.path.exists(assignment_save_dir):
    os.makedirs(assignment_save_dir)
with open(assignment_save_dir + assignment_save_file, 'w') as f:
    for key in random_assignment.keys():
        f.write("%s,%s\n"%(key, random_assignment[key]))

## **Load Mindware IBI data and randomize file names**

In [4]:
def process_mindware_ibi(df: pd.DataFrame) -> tuple[np.ndarray, int]:
    # List to store the IBI values
    ibi_array = []
    # Offset of the first IBI
    first_ibi_offset = None

    # Process each segment
    for col in df.columns:
        # Drop NaN values and get the segment data
        segment_data = df[col].dropna().values

        if len(segment_data) == 0:
            continue
        
        # The first and last values are the start and end offsets
        start_offset = segment_data[0]
        end_offset = segment_data[-1]
        
        # Extract actual IBIs (excluding the start and end offsets)
        true_ibi = segment_data[1:-1] if len(segment_data) > 2 else []

        # Add the start offset to the previous segment's last offset to calculate IBI
        if len(ibi_array) > 0:
            ibi_array[-1] += start_offset
        else:
            first_ibi_offset = int(start_offset)

        # Add actual IBIs to the list
        ibi_array.extend(true_ibi)
        
        # Add the end offset to the list
        ibi_array.append(end_offset)

    ibi_array = np.array(ibi_array)
    # Remove the very last IBI
    ibi_array = ibi_array[:-1]
    
    return ibi_array, first_ibi_offset

In [5]:
mindware_files = os.listdir(mindware_dir)
mindware_files = [f for f in mindware_files if 'HRV Analysis' in f]
preprocessed_files = os.listdir(data_dir)

for condition in conditions:
    mindware_files_condition = [f for f in mindware_files if condition in f]
    preprocessed_files_condition = [f for f in preprocessed_files if condition in f]
    mw_file_NN = [f for f in mindware_files_condition if 'NN' in f][0]
    mw_file_YW = [f for f in mindware_files_condition if 'NN' not in f][0]
    preprocessed_file_NN = [f for f in preprocessed_files_condition if 'NN' in f][0]
    preprocessed_file_YW = [f for f in preprocessed_files_condition if 'NN' not in f][0]
    condition_no = random_assignment[condition]

    mw_df_NN = pd.read_excel(mindware_dir + mw_file_NN, sheet_name='IBI')
    mw_df_YW = pd.read_excel(mindware_dir + mw_file_YW, sheet_name='IBI')
    ibi_NN, first_ibi_offset_NN = process_mindware_ibi(mw_df_NN)
    ibi_YW, first_ibi_offset_YW = process_mindware_ibi(mw_df_YW)
    elapsed_time_NN = first_ibi_offset_NN + np.cumsum(ibi_NN)
    elapsed_time_YW = first_ibi_offset_YW + np.cumsum(ibi_YW)

    preprocessed_df_NN = pd.read_csv(data_dir + preprocessed_file_NN)
    preprocessed_df_YW = pd.read_csv(data_dir + preprocessed_file_YW)
    preprocessed_df_NN['timestamp'] = pd.to_datetime(preprocessed_df_NN['timestamp'])
    preprocessed_df_YW['timestamp'] = pd.to_datetime(preprocessed_df_YW['timestamp'])
    first_time_NN = preprocessed_df_NN['timestamp'].iloc[0]
    first_time_YW = preprocessed_df_YW['timestamp'].iloc[0]
    last_time_NN = preprocessed_df_NN['timestamp'].iloc[-1]
    last_time_YW = preprocessed_df_YW['timestamp'].iloc[-1]

    mw_timestamps_NN = [first_time_NN + pd.Timedelta(milliseconds=t) for t in elapsed_time_NN]
    mw_timestamps_YW = [first_time_YW + pd.Timedelta(milliseconds=t) for t in elapsed_time_YW]

    ibi_df_NN = pd.DataFrame({'IBI': ibi_NN, 'timestamp': mw_timestamps_NN})
    ibi_df_YW = pd.DataFrame({'IBI': ibi_YW, 'timestamp': mw_timestamps_YW})

    # Align time range
    first_timestamp = max(first_time_NN, first_time_YW)
    last_timestamp = min(last_time_NN, last_time_YW)
    ibi_df_NN = ibi_df_NN.loc[(ibi_df_NN['timestamp'] >= first_timestamp) & (ibi_df_NN['timestamp'] <= last_timestamp)].reset_index(drop=True)
    ibi_df_YW = ibi_df_YW.loc[(ibi_df_YW['timestamp'] >= first_timestamp) & (ibi_df_YW['timestamp'] <= last_timestamp)].reset_index(drop=True)
    preprocessed_df_NN = preprocessed_df_NN.loc[(preprocessed_df_NN['timestamp'] >= first_timestamp) & (preprocessed_df_NN['timestamp'] <= last_timestamp)].reset_index(drop=True)
    preprocessed_df_YW = preprocessed_df_YW.loc[(preprocessed_df_YW['timestamp'] >= first_timestamp) & (preprocessed_df_YW['timestamp'] <= last_timestamp)].reset_index(drop=True)

    preprocessed_df_NN['time'] = (preprocessed_df_NN['timestamp'] - first_timestamp).dt.total_seconds() * 1000
    preprocessed_df_YW['time'] = (preprocessed_df_YW['timestamp'] - first_timestamp).dt.total_seconds() * 1000
    preprocessed_df_NN = preprocessed_df_NN.drop(columns=['timestamp'])
    preprocessed_df_YW = preprocessed_df_YW.drop(columns=['timestamp'])

    ibi_df_NN['time'] = (ibi_df_NN['timestamp'] - first_timestamp).dt.total_seconds() * 1000
    ibi_df_YW['time'] = (ibi_df_YW['timestamp'] - first_timestamp).dt.total_seconds() * 1000
    ibi_df_NN = ibi_df_NN.drop(columns=['timestamp'])
    ibi_df_YW = ibi_df_YW.drop(columns=['timestamp'])
    
    ibi_df_NN.to_csv(f'{mindware_dir}mindware_IBI_NN_{condition_no}.csv', index=False)
    ibi_df_YW.to_csv(f'{mindware_dir}mindware_IBI_YW_{condition_no}.csv', index=False)

    preprocessed_df_NN.to_csv(f'{data_dir}preprocessed_NN_{condition_no}.csv', index=False)
    preprocessed_df_YW.to_csv(f'{data_dir}preprocessed_YW_{condition_no}.csv', index=False)