In [2]:
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import logging
import shutil

# Configure logging
logging.basicConfig(filename='To_E4_processing.log', level=logging.INFO, format='%(asctime)s - %(message)s')

# Base input and output paths
base_input_path = 'Processed_Output/'
base_output_path = 'Processed_Results/'

# Ensure the output base directory exists
os.makedirs(base_output_path, exist_ok=True)

# List to store the paths of output directories to zip later
output_dirs_to_zip = []

# Function to process EDA data
def process_eda(file_path, output_path):
    try:
        eda_data = pd.read_csv(file_path)
        initial_time = int(eda_data['unix_timestamp'].iloc[0] / 1e6)  # Convert to seconds
        formatted_data = [f"{initial_time}", "4"] + eda_data['eda'].round(6).astype(str).tolist()
        pd.DataFrame(formatted_data).to_csv(output_path, index=False, header=False)
        logging.info(f"EDA data saved to {output_path}")
    except Exception as e:
        logging.error(f"Failed to process EDA data for {file_path}: {e}")

# Function to process BVP data
def process_bvp(file_path, output_path):
    try:
        bvp_data = pd.read_csv(file_path)
        initial_time = int(bvp_data['unix_timestamp'].iloc[0] / 1e6)  # Convert to seconds
        formatted_data = [f"{initial_time}", "64"] + bvp_data['bvp'].astype(str).tolist()
        pd.DataFrame(formatted_data).to_csv(output_path, index=False, header=False)
        logging.info(f"BVP data saved to {output_path}")
    except Exception as e:
        logging.error(f"Failed to process BVP data for {file_path}: {e}")

# Function to process Temperature data
def process_temp(file_path, output_path):
    try:
        temp_data = pd.read_csv(file_path)
        initial_time = int(temp_data['unix_timestamp'].iloc[0] / 1e6)  # Convert to seconds
        formatted_data = [f"{initial_time}", "4"] + temp_data['temperature'].astype(str).tolist()
        pd.DataFrame(formatted_data).to_csv(output_path, index=False, header=False)
        logging.info(f"Temperature data saved to {output_path}")
    except Exception as e:
        logging.error(f"Failed to process Temperature data for {file_path}: {e}")

# Function to process Pulse Rate (PR) data
def process_pr(file_path, output_path):
    try:
        pr_data = pd.read_csv(file_path)
        initial_time = int(pr_data['timestamp_unix'].dropna().iloc[0] / 1e3)  # Convert to seconds
        formatted_data = [f"{initial_time}", "1"] + pr_data['pulse_rate_bpm'].dropna().astype(str).tolist()
        pd.DataFrame(formatted_data).to_csv(output_path, index=False, header=False)
        logging.info(f"Pulse Rate data saved to {output_path}")
    except Exception as e:
        logging.error(f"Failed to process Pulse Rate data for {file_path}: {e}")

# Function to process Accelerometer (ACC) data
def process_acc(file_path, output_path):
    try:
        acc_data = pd.read_csv(file_path)
        acc_formatted = acc_data[['x', 'y', 'z']] * (1/64)
        scaler = MinMaxScaler(feature_range=(-128, 127))
        acc_formatted = scaler.fit_transform(acc_formatted).round().astype(int)
        initial_time = int(acc_data['unix_timestamp'].iloc[0] / 1e6)  # Convert to seconds
        header = [initial_time] * 3
        sample_rate_row = [64] * 3
        acc_output = pd.DataFrame([[*header], [*sample_rate_row]] + acc_formatted.tolist(), columns=['ACC_X', 'ACC_Y', 'ACC_Z'])
        acc_output.to_csv(output_path, index=False, header=False)
        logging.info(f"Accelerometer data saved to {output_path}")
    except Exception as e:
        logging.error(f"Failed to process Accelerometer data for {file_path}: {e}")

# Function to process IBI data with two-column output format
def process_ibi(file_path, output_path):
    try:
        ibi_data = pd.read_csv(file_path)
        ibi_data['systolic_peak_timestamp_ms'] = ibi_data['systolic_peak_timestamp'] / 1e6  # Convert to milliseconds
        ibi_data['IBI'] = ibi_data['systolic_peak_timestamp_ms'].diff() / 1e3  # Convert IBI to seconds

        # Drop rows with NaN values in 'IBI' column (first row will be NaN due to diff())
        ibi_data_filtered = ibi_data.dropna(subset=['IBI'])

        # Extract the initial timestamp as the header value
        initial_timestamp = int(ibi_data_filtered['systolic_peak_timestamp'].iloc[0] / 1e6) // 1000  # Convert to seconds

        # Create a new DataFrame with two columns: time (relative to the initial timestamp) and IBI values
        ibi_output = pd.DataFrame({
            f"{initial_timestamp}": (ibi_data_filtered['systolic_peak_timestamp'] - ibi_data_filtered['systolic_peak_timestamp'].iloc[0]) / 1e9,  # Convert to seconds for relative time
            'IBI': ibi_data_filtered['IBI']
        })

        # Save the output to a CSV file
        ibi_output.to_csv(output_path, index=False)
        logging.info(f"IBI data saved to {output_path}")
    except Exception as e:
        logging.error(f"Failed to process IBI data for {file_path}: {e}")

# Traverse through all participant directories and their subdirectories
for root, dirs, files in os.walk(base_input_path):
    for file in files:
        if file.endswith('.csv'):
            # Identify the type of data and call the appropriate function
            input_file_path = os.path.join(root, file)
            relative_path = os.path.relpath(root, base_input_path)
            output_dir = os.path.join(base_output_path, relative_path)
            os.makedirs(output_dir, exist_ok=True)

            if 'pulse-rate' in file:
                # Determine the base name for the zip file
                zip_name = file.replace('_pulse-rate.csv', '')
                output_dirs_to_zip.append((output_dir, zip_name))

            if 'accelerometer' in file:
                process_acc(input_file_path, os.path.join(output_dir, 'ACC.csv'))
            elif 'systolic_peaks' in file:
                process_ibi(input_file_path, os.path.join(output_dir, 'IBI.csv'))
            elif 'eda' in file:
                process_eda(input_file_path, os.path.join(output_dir, 'EDA.csv'))
            elif 'bvp' in file:
                process_bvp(input_file_path, os.path.join(output_dir, 'BVP.csv'))
            elif 'temperature' in file:
                process_temp(input_file_path, os.path.join(output_dir, 'TEMP.csv'))
            elif 'pulse-rate' in file:
                process_pr(input_file_path, os.path.join(output_dir, 'HR.csv'))

# Create zip files for each output directory
for output_dir, zip_name in output_dirs_to_zip:
    zip_file_path = os.path.join(base_output_path, f"{zip_name}.zip")
    shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', output_dir)
    logging.info(f"Created zip file {zip_file_path}")
