In [2]:
import pandas as pd
import glob

# Function to process and convert custom timestamp format
def convert_custom_timestamp(epoch_str):
    seconds_part = int(epoch_str[:-3])  # All digits except last 3 are seconds
    milliseconds_part = int(epoch_str[-3:])  # Last 3 digits represent milliseconds
    # Combine to get a float representation in seconds
    return seconds_part + (milliseconds_part / 1000)

def extract_features(df, label):
    features = []

    # Apply timestamp conversion
    df['timestamp'] = df['timestamp'].astype(str).apply(convert_custom_timestamp)

    window_size = 1.0  # in seconds
    
    # Initialize the starting timestamp
    start_time = df['timestamp'].iloc[0]
    
    # Loop through data to extract features for each one-second window
    while start_time + window_size <= df['timestamp'].iloc[-1]:
        # Select data within the one-second window
        window = df[(df['timestamp'] >= start_time) & (df['timestamp'] < start_time + window_size)]
        
        if not window.empty:
            mean_x, std_x = window['X'].mean(), window['X'].std()
            mean_y, std_y = window['Y'].mean(), window['Y'].std()
            mean_z, std_z = window['Z'].mean(), window['Z'].std()
            
            # Append features and label to the list
            features.append([mean_x, std_x, mean_y, std_y, mean_z, std_z, label])
        
        start_time += window_size
    
    return features

path_to_csv_files = 'yes_hand_wash/*.csv'

# Initialize a list to hold all features
all_features = []

# Process each CSV file
for file in glob.glob(path_to_csv_files):
    # Determine label based on file name
    label = 'hand_wash'
    
    # Read CSV file without headers, skipping the first row and selecting relevant columns
    df = pd.read_csv(file, header=None, skiprows=1, usecols=[0, 3, 4, 5], names=['timestamp', 'X', 'Y', 'Z'])
    features = extract_features(df, label)
    all_features.extend(features)

label = 'no_hand_wash'

# Read CSV file without headers, skipping the first row and selecting relevant columns
df = pd.read_csv("no_hand_wash/G6NZCJ00343622D--Mateen-Left_Wrist-soap-not_hand_wash-1970-01-26-03-19-38.csv", header=None, skiprows=1, usecols=[0, 3, 4, 5], names=['timestamp', 'X', 'Y', 'Z'])
features = extract_features(df, label)
all_features.extend(features)

# Save all features to a single CSV file
features_df = pd.DataFrame(all_features, columns=['mean_x', 'std_x', 'mean_y', 'std_y', 'mean_z', 'std_z', 'Activity'])
features_df.to_csv('features.csv', index=False)

