In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd

# Path to original folder
folder_path = "/content/drive/My Drive/AI4ALL PROJECT/GAMEEMO"

# Path to save cleaned files
clean_folder = "/content/drive/My Drive/AI4ALL PROJECT/CLEANED"
os.makedirs(clean_folder, exist_ok=True)  # Create if it doesn't exist

subfolders = os.listdir(folder_path)

for subfolder in subfolders:
    subfolder_path = os.path.join(folder_path, subfolder)

    if not os.path.isdir(subfolder_path):
        continue

    for file in os.listdir(subfolder_path):
        if file.endswith('.csv'):
            file_path = os.path.join(subfolder_path, file)

            try:
                # Load CSV
                df = pd.read_csv(file_path)

                # === CLEANING START ===
                df = df.dropna(axis=1, how='all')  # Drop all-null columns
                df = df.drop(columns=[col for col in df.columns if col.startswith('Unnamed')], errors='ignore')
                df = df.dropna()  # Drop rows with any nulls
                # === CLEANING END ===

                # === SAVE CLEANED FILE ===
                cleaned_filename = f"cleaned_{file}"
                save_path = os.path.join(clean_folder, cleaned_filename)
                df.to_csv(save_path, index=False)

            except Exception as e:
                print(f"Error processing {file}: {e}")


In [None]:
import os
import pandas as pd
import re

# Define brain region channel groups
brain_regions = {
    "Frontal": ['AF3', 'AF4', 'F3', 'F4', 'F7', 'F8'],
    "Temporal": ['T7', 'T8'],
    "Parietal": ['P7', 'P8'],
    "Occipital": ['O1', 'O2'],
    "Fronto-central": ['FC5', 'FC6']
}

# Path to original folder
folder_path = "/content/drive/My Drive/AI4ALL PROJECT/CLEANED"

for file in os.listdir(folder_path):
    if file.endswith('.csv'):
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)

        # Extract participant and game IDs from filename
        match = re.search(r'(S\d+)_?(G\d+)', file)
        if match:
            participant = match.group(1)
            game = match.group(2)
        else:
            participant = "Unknown"
            game = "Unknown"

        # Add regional averages to df
        for region in brain_regions:
            channels = brain_regions[region]
            available_channels = []
            for ch in channels:
                if ch in df.columns:
                    available_channels.append(ch)
            if available_channels:
                df[region] = df[available_channels].mean(axis=1)

        # Add ID columns
        df['Participant'] = participant
        df['Game'] = game

        # Overwrite the original file OR save to a new one
        df.to_csv(file_path, index=False)  # ← saves updated df

        print(f"{file} updated and saved.")


In [None]:
import os
import pandas as pd

# Step 1: Load Google Sheet
sheet_id = "1lse2fQnwI-cZrOQcE2iWBCxMI94L3Mc41fyNmUVbVhU"
sheet_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
label_df = pd.read_csv(sheet_url)

# Step 2: Normalize filenames to match EEG CSV files
def normalize_filename(pdf_name):
    return pdf_name.replace('(', '').replace(')', '').replace('.pdf', '.csv')

label_df['normalized'] = label_df['pdf_file'].apply(normalize_filename)

# Step 3: Columns to keep
rating_cols = ['satisfaction', 'boredom', 'horrible', 'calm', 'funny', 'gender']

# Step 4: Normalize rating columns per subject (based on first 3 chars: S01, S02, etc.)
rating_value_cols = ['satisfaction', 'boredom', 'horrible', 'calm', 'funny']
label_df[rating_value_cols] = label_df.groupby(label_df['pdf_file'].str[:3])[rating_value_cols].transform(
    lambda x: (x - x.mean()) / x.std()
)

# Step 5: Create mapping from file to ratings
rating_mapping = label_df.set_index('normalized')[rating_cols].to_dict(orient='index')

# Step 6: Folder with cleaned EEG CSVs
clean_folder = "/content/drive/My Drive/AI4ALL PROJECT/CLEANED"

# Step 7: Add ratings + gender into each EEG CSV, remove duplicates and 'dominant_emotion'
for file in os.listdir(clean_folder):
    if file.endswith('.csv'):
        file_path = os.path.join(clean_folder, file)

        try:
            df = pd.read_csv(file_path)

            # Remove dominant_emotion if exists
            if 'dominant_emotion' in df.columns:
                df = df.drop(columns=['dominant_emotion'])

            # Match and add ratings
            if file in rating_mapping:
                for col in rating_cols:
                    df[col] = rating_mapping[file][col]

            # Remove duplicate columns by name
            df = df.loc[:, ~df.columns.duplicated()]

            df.to_csv(file_path, index=False)

        except Exception as e:
            print(f"Error processing {file}: {e}")
