# AVI-DYS Post Processing

In [46]:
# Import Libraries
import os
import numpy as np
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from scipy.signal import savgol_filter
from moviepy.editor import *

## User Specific Parameters (CHANGE THESE)

In [47]:
# Define Parameters
project_path = "C:\\Users\\zacha\\Repos\\AVI-DYS\\AVI-DYS-lower-limb-zach_roth-2023-04-19"
save_path = "C:\\Users\\zacha\\Data\\AVI-DYS\\Results\\Post-Processing"

IDs = ['001', '003', '004', '005', '115', '116', '117', '118', '120', '121',
        '122', '123', '125', '126', '127', '128', '129', '130', '131', '132',
        '133', '135', '138', '151', '152', '301', '303', '304', '305', '306',
        '307', '308', '309']

pcutoff = 0.8

visualizations = True

# Create the save paths - ! Make sure these folders exist!
save_raw = os.path.join(save_path,'0-Raw')
save_imp = os.path.join(save_path,'1-Imputed')
save_filt = os.path.join(save_path,'2-Filtered')
save_scale = os.path.join(save_path,'3-Scaled')
save_norm = os.path.join(save_path,'4-Normalized')
save_concat = os.path.join(save_path,'5-Concatenated')

## Functions

In [48]:
def fill_gaps(df):
    """Fill gaps with sklearn's iterative imputer.

    Args:
        df (DataFrame): raw data with missing values

    Returns:
        DataFrame: data with imputed values
    """
    imp = IterativeImputer(max_iter=100, random_state=42,sample_posterior=False,skip_complete=True)
    cols = df.columns
    imp.fit(df)
    data = imp.transform(df)
    df = pd.DataFrame(data,columns=cols)
    return df

In [49]:
def scale_mvmt_data(df,scale_factor):
    """Spatially normalize the the movement data by dividing all values by a scaling factor (the longest length of the KNE_ANK segment), then set the minimum value to 0.

    Args:
        df (DataFrame): Left or Right movement data
        scale_factor (float): The maximum length of the KNE-ANK segment in pixels

    Returns:
        DataFrame: Spatially normalized (scaled) movement data
    """
    df = df/scale_factor
    cols = df.columns
    for c in cols:
        df[c] = df[c]-min(df[c])
    return df

In [50]:
def scale_skltn_data(df,scale_factor):
    """Spatially normalize the skeleton data by dividing the segment lengths by a scaling factor (the longest length of the KNE_ANK segment).

    Args:
        df (DataFrame): Left or Right movement data
        scale_factor (float): The maximum length of the KNE-ANK segment in pixels

    Returns:
        DataFrame: Spatially normalized (scaled) skeleton data
    """
    cols = df.columns
    for c in cols:
        if 'orientation' in c:
            continue
        else:
            df[c] = df[c]/scale_factor
    return df

In [51]:
def normalize_time(df):
    """Make all trials 60s seconds (3000 frames / 50fps) long. If they are longer than 60s, trim them, if they are shorter than 60s, mirror the trial and concat it. 

    Args:
        df (DataFrame): Left or Right movement or skeleton data

    Returns:
        DataFrame: Temporally normalized movment or skeleton data
    """
    while len(df) < 3000:
        df = pd.concat([df,df[::-1]])
    df = df.reset_index(drop=True)
    if len(df) > 3000:
        df = df.iloc[0:3000,]
    return df

In [52]:
# Create Lists of Common Variables

# Create multindexes for column names
bodyparts = ['RKNE', 'RANK', 'RD3M', 'RD1M', 'RHLX', 
             'LHLX', 'LD1M', 'LD3M', 'LANK', 'LKNE']
iterables = [bodyparts, ['x','y','likelihood']]
mvmt_cols = pd.MultiIndex.from_product(iterables, names=["bodyparts", "coords"])

segments = ['RKNE_RANK', 'RANK_RD3M', 'RANK_RD1M', 'RD1M_RHLX',
            'LKNE_LANK', 'LANK_LD3M', 'LANK_LD1M', 'LD1M_LHLX']
iterables = [segments, ['length','orientation','likelihood']]
skltn_cols = pd.MultiIndex.from_product(iterables, names=["segments", "coords"])

# Create list of bodyparts per side
left_bodyparts = ['LHLX', 'LD1M', 'LD3M', 'LANK', 'LKNE']
right_bodyparts = ['RKNE', 'RANK', 'RD3M', 'RD1M', 'RHLX']

# Create a list of left side body part columns in the same order as the right side
left_mvmt_cols_reordered = ['LKNE_x', 'LKNE_y', 'LANK_x', 'LANK_y', 'LD3M_x', 'LD3M_y', 
                          'LD1M_x', 'LD1M_y', 'LHLX_x', 'LHLX_y']

left_segments = ['LKNE_LANK', 'LANK_LD3M', 'LANK_LD1M', 'LD1M_LHLX']
right_segments = ['RKNE_RANK', 'RANK_RD3M', 'RANK_RD1M', 'RD1M_RHLX']

In [53]:
# Create the data path (the video folder in the project path)
data_path = os.path.join(project_path,'videos')

# Get a list of file names
data_file_names = os.listdir(data_path)

In [54]:
# Process the data

# Iterate over the participants
for i in IDs:

    # Get the paths to the left and right mvmt and skltn data
    for n in data_file_names:
        if i+'-12' in n and 'filtered.csv' in n:
            left_mvmt_path = os.path.join(data_path,n)
        elif  i+'-11' in n and 'filtered.csv' in n:
            right_mvmt_path = os.path.join(data_path,n)
        elif i+'-12' in n and 'filtered_skeleton.csv' in n:
            left_skltn_path = os.path.join(data_path,n)
        elif i+'-11' in n and 'filtered_skeleton.csv' in n:
            right_skltn_path = os.path.join(data_path,n)
        elif i+'-12' in n and 'labeled' in n:
            left_video_path = os.path.join(data_path,n)
            
    # Read in the movement and skeleton data
    mvmt_left = pd.read_csv(left_mvmt_path,index_col=0,names=mvmt_cols,skiprows=3)
    mvmt_right = pd.read_csv(right_mvmt_path,index_col=0,names=mvmt_cols,skiprows=3)
    skltn_left = pd.read_csv(left_skltn_path,index_col=0,names=skltn_cols,skiprows=2)
    skltn_right = pd.read_csv(right_skltn_path,index_col=0,names=skltn_cols,skiprows=2)

    # Rename columns
    mvmt_left.columns = ['_'.join(col) for col in mvmt_left.columns]
    mvmt_right.columns = ['_'.join(col) for col in mvmt_right.columns]
    skltn_left.columns = ['_'.join(col) for col in skltn_left.columns]
    skltn_right.columns = ['_'.join(col) for col in skltn_right.columns]
    
    
    
    
    # Apply p-cutoff and drop unused columns
    # left movement data
    for b in bodyparts:
        likelihood_col = b+'_likelihood'
        x_col = b+'_x'
        y_col=b+'_y'
        if b in right_bodyparts:
            mvmt_left = mvmt_left.drop([likelihood_col,x_col,y_col],axis=1)
        else:
            mvmt_left.loc[mvmt_left[likelihood_col] < 0.8, [x_col, y_col]] = np.NaN
            mvmt_left = mvmt_left.drop([likelihood_col],axis=1)
            
    # Reorder the left movement columns to the same order as the right side
    mvmt_left = mvmt_left[left_mvmt_cols_reordered]

    # right movement data
    for b in bodyparts:
        likelihood_col = b+'_likelihood'
        x_col = b+'_x'
        y_col=b+'_y'
        if b in left_bodyparts:
            mvmt_right = mvmt_right.drop([likelihood_col,x_col,y_col],axis=1)
        else:
            mvmt_right.loc[mvmt_right[likelihood_col] < 0.8, [x_col, y_col]] = np.NaN
            mvmt_right = mvmt_right.drop([likelihood_col],axis=1)
            
    # left skeleton data
    for b in segments:
        likelihood_col = b+'_likelihood'
        length_col = b+'_length'
        orientation_col=b+'_orientation'
        if b in right_segments:
            skltn_left = skltn_left.drop([likelihood_col,length_col,orientation_col],axis=1)
        else:
            skltn_left.loc[skltn_left[likelihood_col] < 0.8, [length_col, orientation_col]] = np.NaN
            skltn_left = skltn_left.drop([likelihood_col],axis=1)
             
    # right skeleton data
    for b in segments:
        likelihood_col = b+'_likelihood'
        length_col = b+'_length'
        orientation_col=b+'_orientation'
        if b in left_segments:
            skltn_right = skltn_right.drop([likelihood_col,length_col,orientation_col],axis=1)
        else:
            skltn_right.loc[skltn_right[likelihood_col] < 0.8, [length_col, orientation_col]] = np.NaN
            skltn_right = skltn_right.drop([likelihood_col],axis=1)
    
    # Get Scale Factors for Spatial Normalization
    scale_factor_left = max(skltn_left['LKNE_LANK_length'])
    scale_factor_right = max(skltn_right['RKNE_RANK_length'])

    # Mirror the left movement data
    clip = VideoFileClip(os.path.join(data_path,left_video_path))
    width = clip.w
    for c in mvmt_left.columns:
        if '_y' not in c:
            mvmt_left[c] = abs(mvmt_left[c] - (width/2))

    # Mirror the left segment orientations
        for c in skltn_left.columns:
            if 'length' not in c:
                skltn_left[c] = 180 - skltn_left[c]
    # Save the raw data
    mvmt_left.to_csv(os.path.join(save_raw,f'{i}-mvmt-left.csv'))
    mvmt_right.to_csv(os.path.join(save_raw,f'{i}-mvmt-right.csv'))
    skltn_left.to_csv(os.path.join(save_raw,f'{i}-skltn-left.csv'))
    skltn_right.to_csv(os.path.join(save_raw,f'{i}-skltn-right.csv'))




    # Interpolatate Missing Data with IterativeImputer
    mvmt_left = fill_gaps(mvmt_left)
    mvmt_right = fill_gaps(mvmt_right)
    skltn_left = fill_gaps(skltn_left)
    skltn_right = fill_gaps(skltn_right)

    # Save the imputed data
    mvmt_left.to_csv(os.path.join(save_imp,f'{i}-mvmt-left.csv'))
    mvmt_right.to_csv(os.path.join(save_imp,f'{i}-mvmt-right.csv'))
    skltn_left.to_csv(os.path.join(save_imp,f'{i}-skltn-left.csv'))
    skltn_right.to_csv(os.path.join(save_imp,f'{i}-skltn-right.csv'))




    # Filter the data
    for c in mvmt_left.columns:
        mvmt_left[c] = savgol_filter(mvmt_left[c], 30, 2)

    for c in mvmt_right.columns:
        mvmt_right[c] = savgol_filter(mvmt_right[c], 30, 2)
    
    for c in skltn_left.columns:
        skltn_left[c] = savgol_filter(skltn_left[c], 30, 2)

    for c in skltn_right.columns:
        skltn_right[c] = savgol_filter(skltn_right[c], 30, 2)

    # Save the filtered data
    mvmt_left.to_csv(os.path.join(save_filt,f'{i}-mvmt-left.csv'))
    mvmt_right.to_csv(os.path.join(save_filt,f'{i}-mvmt-right.csv'))
    skltn_left.to_csv(os.path.join(save_filt,f'{i}-skltn-left.csv'))
    skltn_right.to_csv(os.path.join(save_filt,f'{i}-skltn-right.csv'))




    # Scale Data
    mvmt_left = scale_mvmt_data(mvmt_left, scale_factor_left)
    mvmt_right = scale_mvmt_data(mvmt_right, scale_factor_right)
    skltn_left = scale_skltn_data(skltn_left, scale_factor_left)
    skltn_right = scale_skltn_data(skltn_right, scale_factor_right)

    # Save the scaled data
    mvmt_left.to_csv(os.path.join(save_scale,f'{i}-mvmt-left.csv'))
    mvmt_right.to_csv(os.path.join(save_scale,f'{i}-mvmt-right.csv'))
    skltn_left.to_csv(os.path.join(save_scale,f'{i}-skltn-left.csv'))
    skltn_right.to_csv(os.path.join(save_scale,f'{i}-skltn-right.csv'))




    # Temporally normalize the data
    mvmt_left = normalize_time(mvmt_left)
    mvmt_right = normalize_time(mvmt_right)
    skltn_left = normalize_time(skltn_left)
    skltn_right = normalize_time(skltn_right)

    # Save the normalized data
    mvmt_left.to_csv(os.path.join(save_norm,f'{i}-mvmt-left.csv'))
    mvmt_right.to_csv(os.path.join(save_norm,f'{i}-mvmt-right.csv'))
    skltn_left.to_csv(os.path.join(save_norm,f'{i}-skltn-left.csv'))
    skltn_right.to_csv(os.path.join(save_norm,f'{i}-skltn-right.csv'))




    # Concat left or right data together
    cols = ['KNE_x', 'KNE_y', 
            'ANK_x', 'ANK_y', 
            'D3M_x', 'D3M_y', 
            'D1M_x', 'D1M_y', 
            'HLX_x', 'HLX_y', 
            'KNE_ANK_length', 'KNE_ANK_orientation', 
            'ANK_D3M_length', 'ANK_D3M_orientation',
            'ANK_D1M_length', 'ANK_D1M_orientation', 
            'D1M_HLX_length', 'D1M_HLX_orientation',
            ]
   
    left_data = pd.concat([mvmt_left,skltn_left],axis=1)
    left_data.columns = cols
 
    right_data = pd.concat([mvmt_right,skltn_right],axis=1)
    right_data.columns = cols

    # Save the concatenated data
    left_data.to_csv(os.path.join(save_concat,f'{i}-left-data.csv'))
    right_data.to_csv(os.path.join(save_concat,f'{i}-right-data.csv'))