<a href="https://colab.research.google.com/github/vshukl01/Neurova_Shield/blob/main/3_df_newlabelled.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd

In [None]:
# Base folder containing subject folders
from google.colab import drive
drive.mount('/content/drive')

base_dir = '/content/drive/MyDrive/WESAD/df_files'

# Desired output path for final merged file
merged_output_path = '/content/drive/MyDrive/WESAD/df_files/all_subjects_labeled_final.csv'

# Store cleaned DataFrames
merged_df_list = []

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Loop through each subject folder
for folder in sorted(os.listdir(base_dir)):
    folder_path = os.path.join(base_dir, folder)

    if os.path.isdir(folder_path) and folder.startswith("S") and folder.endswith("_df_files"):
        subject = folder.split("_")[0].lower()  # e.g., 's2'
        sensor_file = os.path.join(folder_path, f"{subject}_1hz_df.csv")
        quest_file = os.path.join(folder_path, f"{subject}_quest_combined_df.csv")
        output_file = os.path.join(folder_path, f"{subject}_1hz_labeled_df.csv")

        print(f"\n📁 Processing {folder}...")

        try:
            # Load sensor and questionnaire data
            sensor_df = pd.read_csv(sensor_file, parse_dates=['timestamp'])
            quest_df = pd.read_csv(quest_file)

            # Calculate elapsed time
            time_diff = (sensor_df['timestamp'] - sensor_df['timestamp'].iloc[0]).dt.total_seconds()
            sensor_df['minute'] = (time_diff / 60).round(3)  # Rounded to 3 decimal places
            sensor_df['minute_int'] = (time_diff // 60).astype(int)
            sensor_df['second'] = (time_diff % 60).astype(int)
            sensor_df['MM_SS'] = sensor_df['minute_int'].astype(str).str.zfill(2) + ":" + sensor_df['second'].astype(str).str.zfill(2)

            # Initialize labeling columns
            sensor_df['Condition'] = pd.NA
            emotion_cols = [col for col in quest_df.columns if col not in ['Condition', 'Start_Time_min', 'End_Time_min']]
            for col in emotion_cols:
                sensor_df[col] = pd.NA

            # Apply labels from questionnaire
            for _, row in quest_df.iterrows():
                start, end, condition = row['Start_Time_min'], row['End_Time_min'], row['Condition']
                mask = (sensor_df['minute'] >= start) & (sensor_df['minute'] <= end)
                sensor_df.loc[mask, 'Condition'] = condition
                for col in emotion_cols:
                    sensor_df.loc[mask, col] = row[col]

            # Clean index before saving
            sensor_df.reset_index(drop=True, inplace=True)

            # Save enriched CSV without index
            sensor_df.to_csv(output_file, index=False)
            print(f"✅ Saved labeled file: {output_file}")

            # Print preview
            print("📊 Preview:")
            print(sensor_df[['timestamp', 'minute', 'minute_int', 'second', 'MM_SS', 'Condition'] + emotion_cols].head(5))

        except Exception as e:
            print(f"❌ Error processing {folder}: {e}")


📁 Processing S10_df_files...
✅ Saved labeled file: /content/drive/MyDrive/WESAD/df_files/S10_df_files/s10_1hz_labeled_df.csv
📊 Preview:
            timestamp  minute  minute_int  second  MM_SS Condition index  \
0 2017-07-25 07:05:08   0.000           0       0  00:00      <NA>  <NA>   
1 2017-07-25 07:05:09   0.017           0       1  00:01      <NA>  <NA>   
2 2017-07-25 07:05:10   0.033           0       2  00:02      <NA>  <NA>   
3 2017-07-25 07:05:11   0.050           0       3  00:03      <NA>  <NA>   
4 2017-07-25 07:05:12   0.067           0       4  00:04      <NA>  <NA>   

  Active Distressed Interested  ... I am worried I feel pleasant Valence  \
0   <NA>       <NA>       <NA>  ...         <NA>            <NA>    <NA>   
1   <NA>       <NA>       <NA>  ...         <NA>            <NA>    <NA>   
2   <NA>       <NA>       <NA>  ...         <NA>            <NA>    <NA>   
3   <NA>       <NA>       <NA>  ...         <NA>            <NA>    <NA>   
4   <NA>       <NA>       

In [None]:
# Loop through all subject folders
for folder in sorted(os.listdir(base_dir)):
    folder_path = os.path.join(base_dir, folder)

    if os.path.isdir(folder_path) and folder.startswith("S") and folder.endswith("_df_files"):
        subject = folder.split("_")[0].upper()  # e.g., 'S10'
        labeled_file = os.path.join(folder_path, f"{subject.lower()}_1hz_labeled_df.csv")

        try:
            # Load file
            df = pd.read_csv(labeled_file)

            # Drop index column if it exists (like "Unnamed: 0")
            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

            # Add Subject_No
            df['Subject_No'] = subject

            # Save cleaned file (overwrite)
            df.to_csv(labeled_file, index=False)
            print(f"✅ Cleaned and saved: {labeled_file}")

            # Add to merged list
            merged_df_list.append(df)

        except Exception as e:
            print(f"❌ Error processing {labeled_file}: {e}")

# Merge all files into one
if merged_df_list:
    merged_df = pd.concat(merged_df_list, ignore_index=True)
    merged_df.to_csv(merged_output_path, index=False)
    print(f"\n✅ Merged CSV saved to: {merged_output_path}")
    print(f"🔢 Total merged rows: {len(merged_df)}")
else:
    print("❌ No valid labeled files found to merge.")


✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S10_df_files/s10_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S11_df_files/s11_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S13_df_files/s13_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S14_df_files/s14_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S15_df_files/s15_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S16_df_files/s16_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S17_df_files/s17_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S2_df_files/s2_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S3_df_files/s3_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_files/S4_df_files/s4_1hz_labeled_df.csv
✅ Cleaned and saved: /content/drive/MyDrive/WESAD/df_fil