In [14]:
#Part 1: Importing Main Libraries
import pandas as pd #used for data manipulation
import os #used for directory operations
import re #used for regular expression handling

In [29]:
# Part 2: Defining the main directory contatining exercise data.
data_dir = "G:/My Drive/BME 1570 Project/PHYTMO/inertial/inertial_exercise_data"

metadata = [] #initializes an empty list to store the metadata and data from each file

#checks to make sure the directory exists: 
if os.path.exists(data_dir):
    print("Main directory exists.")
    
    # looks for any exercise folders within the directory
    exercise_folders = [folder for folder in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, folder))]
    
    if exercise_folders:
        print(f"Found {len(exercise_folders)} exercise folders in the main directory.")
        
        for exercise_folder in exercise_folders:
            exercise_path = os.path.join(data_dir, exercise_folder)
            print(f"\nProcessing folder: {exercise_folder}")
            
            #check for any .csv files within the folder
            csv_files = [f for f in os.listdir(exercise_path) if f.endswith('.csv')]
            
            #if there are .csv files in the folder, print out the number of files present
            if csv_files:
                print(f"Found {len(csv_files)} CSV files in {exercise_folder}.")
                
                #test to read the first csv file present (to ensure the files are accessed and readable)
                test_file_path = os.path.join(exercise_path, csv_files[0])
                print(f"Attempting to read file: {test_file_path}")
                
                try:
                    test_df = pd.read_csv(test_file_path)
                    print("CSV file read successfully! Displaying the first few rows:")
                    print(test_df.head())  # displays the first few rows to verify contents
                    
                except Exception as e:
                    print(f"Error reading {test_file_path}: {e}")
            else:
                print(f"No CSV files found in folder {exercise_folder}.")
    else:
        print("No exercise folders found in the main directory.")
else:
    print("Main directory does not exist.")


Main directory exists.
Found 10 exercise folders in the main directory.

Processing folder: zMislabeled
No CSV files found in folder zMislabeled.

Processing folder: Extension Arms Over Head
Found 464 CSV files in Extension Arms Over Head.
Attempting to read file: G:/My Drive/BME 1570 Project/PHYTMO/inertial/inertial_exercise_data\Extension Arms Over Head\A01EAHXLAY_1.csv
CSV file read successfully! Displaying the first few rows:
    Time (s)  Gyroscope X (deg/s)  Gyroscope Y (deg/s)  Gyroscope Z (deg/s)  \
0  11.350807             40.48140            -46.52059            -5.424869   
1  11.360693             50.55264            -53.34631            -0.232101   
2  11.370578             58.51016            -63.40838             6.182496   
3  11.380463             62.17809            -73.23508            13.330190   
4  11.390348             61.61857            -80.94344            20.783340   

   Accelerometer X (g)  Accelerometer Y (g)  Accelerometer Z (g)  \
0            -0.080877 

In [38]:
# Part 3: Incorporating data into the metadata list. 

# loops through exercise folders in main directory
for exercise_folder in os.listdir(data_dir): 
    exercise_path = os.path.join(data_dir, exercise_folder) #creates path for exercise folder


    #checking to see if the path is a directory (we are only looking at folders):
    if os.path.isdir(exercise_path):
        print(f"Processing Folder: {exercise_folder}") #checkpoint to show which folder you are in


        # check to see if there are .csv files in the folder 
        csv_files = [f for f in os.listdir(exercise_path) if f.endswith(".csv")]
        if not csv_files:
            print(f"  Skipping {exercise_folder} (no CSV files found).")
            continue  # skips to the next folder if no CSV files are present

        # Process each CSV file in the folder
        for filename in csv_files:
            #match the filename to the nomenclature implemented
            pattern = r"([A-Z]\d{2})([A-Z]{4})([A-Z]{2})([YN])_(\d+)\.csv"

            match = re.match(pattern, filename)

             #if statement to extract metadata, if filename matches pattern
            if match: 
                #metadata extractions from filename: 
                patient_id, exercise_code, muscle_group, correctness, series = match.groups()

                #defining the file path to extract data from csv
                file_path = os.path.join(exercise_path, filename)

                try:
                    data = pd.read_csv(file_path)
                    print(f" Successfully read file: {filename}") #checkpoint to show which file you are reading
                    # Add metadata columns to the DataFrame
                    data['Patient_ID'] = patient_id            # Store patient ID
                    data['Exercise'] = exercise_code           # Store exercise code (from filename)
                    data['Exercise_Type'] = exercise_folder    # Store exercise type (from folder name)
                    data['Muscle_Group'] = muscle_group        # Store muscle group
                    data['Correctness'] = correctness          # Store Correctness
                    data['Series'] = int(series)               # Store series number as an integer
                        
                    # Append the DataFrame to the metadata list for future concatenation
                    metadata.append(data)
                except Exception as e:
                    print(f"  Error reading {file_path}: {e}")  # Error message if reading fails
            else:
                print(f"  Filename {filename} does not match the expected pattern.")
 




Processing Folder: zMislabeled
  Skipping zMislabeled (no CSV files found).
Processing Folder: Extension Arms Over Head
 Successfully read file: A01EAHXLAY_1.csv
 Successfully read file: A01EAHXLAY_2.csv
 Successfully read file: A01EAHXLAN_1.csv
 Successfully read file: A01EAHXLAN_2.csv
 Successfully read file: A03EAHXLAY_1.csv
 Successfully read file: A03EAHXLAY_2.csv
 Successfully read file: A03EAHXLAN_1.csv
 Successfully read file: A03EAHXLAN_2.csv
 Successfully read file: A04EAHXLAY_1.csv
 Successfully read file: A04EAHXLAY_2.csv
 Successfully read file: A04EAHXLAN_1.csv
 Successfully read file: A04EAHXLAN_2.csv
 Successfully read file: A05EAHXLAY_1.csv
 Successfully read file: A05EAHXLAY_2.csv
 Successfully read file: A05EAHXLAN_1.csv
 Successfully read file: A05EAHXLAN_2.csv
 Successfully read file: A06EAHXLAY_1.csv
 Successfully read file: A06EAHXLAY_2.csv
 Successfully read file: A06EAHXLAN_1.csv
 Successfully read file: A06EAHXLAN_2.csv
 Successfully read file: A07EAHXLAY_1.cs

In [41]:
# Part 4: Concatenate all the dataframes into one single dataframe

if metadata:
    all_data = pd.concat(metadata, ignore_index=True)
    print("\nAll files successfully concatenated.")
    # print(all_data.head())  # Display the first few rows to verify data organization

    # reordering dataframe to a more logical order (for readability)
    column_order = [
        'Patient_ID', 'Exercise', 'Muscle_Group', 'Correctness', 'Series',
        'Gyroscope X (deg/s)', 'Gyroscope Y (deg/s)', 'Gyroscope Z (deg/s)',
        'Accelerometer X (g)', 'Accelerometer Y (g)', 'Accelerometer Z (g)',
        'Magnetometer X (uT)', 'Magnetometer Y (uT)', 'Magnetometer Z (uT)'
    ]

    
    all_data = all_data[column_order]

    # printing first few lines to verify structure
    print(all_data.head())

else:
    print("No data files were loaded.")




All files successfully concatenated.
  Patient_ID Exercise Muscle_Group Correctness  Series  Gyroscope X (deg/s)  \
0        A01     EAHX           LA           1       1             40.48140   
1        A01     EAHX           LA           1       1             50.55264   
2        A01     EAHX           LA           1       1             58.51016   
3        A01     EAHX           LA           1       1             62.17809   
4        A01     EAHX           LA           1       1             61.61857   

   Gyroscope Y (deg/s)  Gyroscope Z (deg/s)  Accelerometer X (g)  \
0            -46.52059            -5.424869            -0.080877   
1            -53.34631            -0.232101            -0.081777   
2            -63.40838             6.182496            -0.066653   
3            -73.23508            13.330190            -0.035095   
4            -80.94344            20.783340             0.001714   

   Accelerometer Y (g)  Accelerometer Z (g)  Magnetometer X (uT)  \
0         