In [None]:
import pandas as pd
import glob
import os

# Function to process and convert custom timestamp format
def convert_custom_timestamp(epoch_str):
    seconds_part = int(epoch_str[:-3])  # All digits except last 3 are seconds
    milliseconds_part = int(epoch_str[-3:])  # Last 3 digits represent milliseconds
    # Combine to get a float representation in seconds
    return seconds_part + (milliseconds_part / 1000)

def extract_features(df, label, window_size=1.0, slide_size=1.0):
    features = []

    # Apply timestamp conversion
    df['timestamp'] = df['timestamp'].astype(str).apply(convert_custom_timestamp)
    
    # Initialize the starting timestamp
    start_time = df['timestamp'].iloc[0]
    
    # Loop through data to extract features for each one-second window
    while start_time + slide_size <= df['timestamp'].iloc[-1]:
        # Select data within the one-second window
        window = df[(df['timestamp'] >= start_time) & (df['timestamp'] < start_time + window_size)]
        
        if not window.empty:
            mean_x, std_x, median_x, root_mean_square_x = window['X'].mean(), window['X'].std(), window['X'].median(), window['X'].pow(2).mean() ** 0.5
            mean_y, std_y, median_y, root_mean_square_y = window['Y'].mean(), window['Y'].std(), window['Y'].median(), window['Y'].pow(2).mean() ** 0.5
            mean_z, std_z, median_z, root_mean_square_z = window['Z'].mean(), window['Z'].std(), window['Z'].median(), window['Z'].pow(2).mean() ** 0.5
            
            # Append features and label to the list
            features.append([mean_x, std_x, median_x, root_mean_square_x, 
                             mean_y, std_y, median_y, root_mean_square_y, 
                             mean_z, std_z, median_z, root_mean_square_z, 
                             label])
        
        start_time += slide_size
    
    return features

def get_all_features(path_to_csv_files, window_size=1.0, slide_size=1.0):
    all_features = []
    for file in glob.glob(path_to_csv_files):
        # Determine label based on the file name
        file_name = os.path.basename(file)
        if "Amir" in file_name:
            label = "left"
        elif "Mateen" in file_name:
            label = "right"
        elif "William" in file_name:
            label = "up"
        else:
            label = "nothing"
        
        # Read the CSV file
        df = pd.read_csv(file, header=None, skiprows=1, usecols=[0, 3, 4, 5], names=['timestamp', 'X', 'Y', 'Z'])
        features = extract_features(df, label, window_size, slide_size)
        all_features.extend(features)
        
    return all_features

window_size = 1  # Set based on the average of the gesture duration
all_features = get_all_features('./all_data_uncleaned/*.csv', window_size=window_size)

# Create DataFrame
features_df = pd.DataFrame(all_features, columns=['mean_x', 'std_x', 'median_x', 'root_mean_square_x',
                                                    'mean_y', 'std_y', 'median_y', 'root_mean_square_y',
                                                    'mean_z', 'std_z', 'median_z', 'root_mean_square_z',
                                                    'Activity'])
features_df.to_csv('features_window1.csv', index=False)
