In [1]:
import os
import re
import pandas as pd
import numpy as np
from datetime import datetime


In [2]:
# Function to label data as "Fall" or "Not Fall"
def label_fall(data):
    highest_magnitude_index = data['Magnitude'].idxmax()
    
    # Set the time window for fall detection
    window_before = 0.4
    window_after = 0.6
    
    # Define the start and end times for the fall window
    start_time = data.loc[highest_magnitude_index, 'Time'] - window_before
    end_time = data.loc[highest_magnitude_index, 'Time'] + window_after
    
    # Label "True" for rows within the fall window, and "False" otherwise
    data['Fall'] = (data['Time'] >= start_time) & (data['Time'] <= end_time)
    
    return data

In [3]:
#Function to rename output file, so when we copy to common location it does not overwrite
def get_output_file_path(source, dir, file):
    
    file_path = os.path.join(dir, file)
    # Extract the file name from the path
    file_name = os.path.basename(file_path)

    # Use regular expression to find the "sub" part
    match = re.search(r'sub(\d+)', source)
    file_parts = file_name.split('_')

    if match:
        folder_name = match.group(0)
        new_file_name = f"{file_parts[0]}_{file_parts[1]}_{folder_name}_{file_parts[2]}"
        
    output_file_path = os.path.join(dir, new_file_name)
    
    return output_file_path


In [4]:
def process_excel_files(input_path, output_path, folder):

    # Set window size to calculate SMA and Kurtosis
    window_size = 20
    
    for excel_file in os.listdir(input_path):
        # Check if it's an Excel file
        if excel_file.endswith('.xlsx'):
            # Construct the full path for input and output files
            input_file_path = os.path.join(input_path, excel_file)
            output_file_path = os.path.join(output_path, excel_file)
    
            # Ensure the output subfolder exists; create it if it doesn't
            if not os.path.exists(output_path):
                os.makedirs(output_path)
    
            # Read the Excel file into a pandas DataFrame
            raw_df = pd.read_excel(input_file_path)
            
            # Select every 12th row
            df = raw_df.iloc[::12]
            
            # Append the first row
            first_row = raw_df.iloc[0]
            df = pd.concat([first_row.to_frame().T, df], ignore_index=True)
            
            # Reset the index
            df.reset_index(drop=True, inplace=True)


            # Keep only columns A, B, and C (assuming these are present in all files)
            columns_to_keep = ['Time', 'sternum Acceleration X (m/s^2)',
                              'sternum Acceleration Y (m/s^2)',
                              'sternum Acceleration Z (m/s^2)']

            df = df[columns_to_keep]
            #Rename columns to match our Accelerator data
            column_mapping = {'sternum Acceleration X (m/s^2)': 'X', 
                               'sternum Acceleration Y (m/s^2)': 'Y', 
                               'sternum Acceleration Z (m/s^2)': 'Z'}
            df.rename(columns=column_mapping, inplace=True)
            acceleration_columns = ['X', 'Y', 'Z']
            
            df['Time'] = (pd.to_datetime(df['Time'], unit='us') - pd.to_datetime(df['Time'].min(), unit='us')) / pd.Timedelta(1, unit='s')
            
            df['Magnitude'] = np.linalg.norm(df[['X', 'Y', 'Z']], axis=1)
            df['SMA'] = df['Magnitude'].abs().rolling(window=window_size, center=True).mean()
            df['Kurtosis'] = df['Magnitude'].abs().rolling(window=window_size, center=True).kurt()
                        
            # Calculate tilt angle
            df['Tilt_Angle'] = np.arctan2(df['Y'], np.sqrt(df['X']**2 + df['Z']**2))
            
            # Convert tilt angle to degrees
            df['Tilt_Angle'] = np.degrees(df['Tilt_Angle'])

            #Calculate Jerk
            df['Jerk_X'] = df['X'].diff(2) / (df['Time'].diff(2))
            df['Jerk_Y'] = df['Y'].diff(2) / (df['Time'].diff(2))
            df['Jerk_Z'] = df['Z'].diff(2) / (df['Time'].diff(2))

            # Label data as "Fall" or "Not Fall"
            if folder == 'Falls':
                df = label_fall(df)
            else:
                df['Fall'] = False

            df = df.dropna()
            
            # Write the modified DataFrame to a new Excel file
            df.to_excel(output_file_path, index=False)
            #break

In [5]:
# Replace 'output_folder' with the path where you want to save the modified Excel files
input_folder = 'imu-dataset-raw'
output_folder = 'imu-dataset-processed'
folders = ['sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7', 'sub8', 'sub9', 'sub10']
sub_folders = ['ADLs', 'Falls', 'Near_Falls']
#sub_folders = ['ADLs', 'Falls']
# Ensure the output folder exists; create it if it doesn't
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

break_outer = False
# Loop through each subfolder (sub1, sub2, ..., sub10)
for folder in folders:
    input_dir = os.path.join(os.getcwd(), input_folder, folder)
    output_dir = os.path.join(os.getcwd(), output_folder, folder)
    
    for subfolder in sub_folders:
        input_path = os.path.join(input_dir, subfolder)
        output_path = os.path.join(output_dir, subfolder)

        # Check if it's a directory
        if os.path.isdir(input_path):
            # Loop through each Excel file in the subfolder
            process_excel_files(input_path, output_path, subfolder)
            #break_outer = True
            #break
    
    if break_outer:
        break
        

print('Processing complete.')

Processing complete.


In [6]:
import shutil

#Collate all files in one output folder
combine_folder = 'combine'
file_count = 0

dest_dir = os.path.join(os.getcwd(), output_folder, combine_folder)

if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

for folder in folders:
    source_dir = os.path.join(os.getcwd(), output_folder, folder)
    
    for subfolder in sub_folders:
        source_path = os.path.join(source_dir, subfolder)
        for excel_file in os.listdir(source_path):
            if excel_file.endswith('.xlsx'):
                # Construct the full path for input and output files
                source_file_path = os.path.join(source_path, excel_file)
                dest_file_path = get_output_file_path(source_path, dest_dir, excel_file)
                
            if os.path.exists(dest_file_path):
                print(f'File exists {source_file_path}')

            shutil.copy(source_file_path, dest_file_path)
            file_count += 1

print(f"Total files {file_count}")



File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\AXR_AS_trial1.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\AXR_AS_trial2.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\AXR_AS_trial3.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\AXR_DS_trial1.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\AXR_DS_trial2.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\AXR_DS_trial3.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\JXL_DSL_trial1.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\JXL_DSL_trial2.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\JXL_DSL_trial3.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\JXL_DSS_trial1.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\JXL_DSS_trial2.xlsx
File exists D:\Projects\SafeBox\imu-dataset-processed\sub1\ADLs\JXL_DSS_trial3.xls