In [4]:
import numpy as np
import os
import sys       
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy
import pickle

# Get the path of the parent_directory
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
bin_dir = parent_dir + "/data/raw/bin_files"
csv_dir = parent_dir + "/data/raw/excel_files"
sys.path.append(parent_dir)

# For .bin patch parsing
from patch.Processing.processECG import process_ECG
from patch.Processing.processPPG import process_PPG, select_ppg_array, extract_PPG_fiducial
from patch.Processing.processSCG import process_SCG, extract_SCG_fiducials
from patch.Tools.preprocessing import get_filt_dict, dict_interpolation
from patch.Tools.pipelineConfig import *
from patch.Tools.patchParser import get_newest_patch_file, parse_file

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
bin_file_paths = []

csv_file_paths = os.listdir(csv_dir)
csv_file_paths = [csv_dir + '/' + s for s in csv_file_paths]

for i in range(len(csv_file_paths)):
    if csv_file_paths[i][-1] == 's':
        del csv_file_paths[i]
        break

for i in range(len(csv_file_paths)):
    bin_file_paths.append(bin_dir + "/" + os.path.basename(os.path.normpath(csv_file_paths[i]))[3:] + ".bin")

In [8]:
skip_sub = [0,1,2,3,4,5,6,7,8]

for i in range(len(bin_file_paths)):
    if i in skip_sub:
        continue
    print("i = " + str(i))
    print("Processing: " + bin_file_paths[i])
    print("Processing: " + csv_file_paths[i])
    
    # Parsing raw bin file into raw_dict
    raw_dict = parse_file(bin_file_paths[i])
    
    # Interpolation, resampling, filtering
    patch_dict = dict_interpolation(raw_dict, fs_resample=FS_RESAMPLE)
    patch_filt_dict = get_filt_dict(patch_dict, FILT_ECG, FILT_SCG, FILT_PPG, Fs=FS_RESAMPLE)
    
    # Reading in timestamps of activities
    df_timestamps = pd.read_csv(csv_file_paths[i] + '/df_timestamps.csv')
    
    # Changing timestamps to 0
    df_timestamps['timestamp'] = df_timestamps['timestamp'] - df_timestamps['timestamp'][0]
    
    if len(df_timestamps['timestamp']) < 10:
        continue
    
    # Indicies for start and stop times (index loc) of activities
    rest_2_start = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][3])
    rest_2_end = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][4])
    script_start = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][5])
    script_end = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][5] + 120)
    prep_start = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][6])
    prep_end = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][6] + 120)
    speaking_start = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][7])
    speaking_end = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][8])
    recovery_start = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][9])
    recovery_end = next(x for x, val in enumerate(patch_filt_dict['time']) if val > df_timestamps['timestamp'][9] + 1200)
    
    # Segmenting filtered dict
    rest_2 = {key: patch_filt_dict[key][rest_2_start:rest_2_end] for key in patch_filt_dict.keys()}
    script = {key: patch_filt_dict[key][script_start:script_end] for key in patch_filt_dict.keys()}
    prep = {key: patch_filt_dict[key][prep_start:prep_end] for key in patch_filt_dict.keys()}
    speaking = {key: patch_filt_dict[key][speaking_start:speaking_end] for key in patch_filt_dict.keys()}
    recovery = {key: patch_filt_dict[key][recovery_start:recovery_end] for key in patch_filt_dict.keys()}
    
    activities = [rest_2,script,prep,speaking,recovery]  
    act_titles = ['rest_2','script','prep','speaking','recovery']
    
    save_dir = parent_dir + "/data/interim/parse_filtered"
    if not os.path.exists(save_dir + f"/{os.path.basename(os.path.normpath(csv_file_paths[i]))[3:]}"):
        os.makedirs(save_dir + f"/{os.path.basename(os.path.normpath(csv_file_paths[i]))[3:]}")
        
    for j in range(len(activities)):
        with open (f"{save_dir}/{os.path.basename(os.path.normpath(csv_file_paths[i]))[3:]}/{act_titles[j]}.pkl", "wb") as f:
            pickle.dump(activities[j], f)

i = 9
Processing: /home/michael/Code/mims-transformer-stress-classification/data/raw/bin_files/6049.bin
Processing: /home/michael/Code/mims-transformer-stress-classification/data/raw/excel_files/sub6049
Device name: HP-0247
Last sync time: Thu May 19 13:22:18 2022 UTC
Accel Size: 18545966
PPG Size: 1212444
ECG Size: 9152336
Environmental Size: 37091
AST Time Length: 18316.512451171875
179649748 100.0% done                      

STATS:
Measurement Length: 18316.43 seconds
File Size: 179650.048 kbytes
Average data rate: 9808.14 bytes/second
Theoretical uncompressed data rate: 17365.66 bytes/second
PPG is 12.15% of the data
ECG is 12.64% of the data
Accel is 50.35% of the data
Environmental is 0.14% of the data
AST is 10.35% of the data
Data Flag is 10.32% of the data
time ellapsed: 38.756 seconds
parse speed: 4635463.512 bytes/second
parse speed: 472.614 dataSeconds/second
i = 10
Processing: /home/michael/Code/mims-transformer-stress-classification/data/raw/bin_files/6048.bin
Processing