# WESAD Dataset Preprocessing

Este notebook executa o pré-processamento completo do dataset WESAD.

## O que este notebook faz:
1. Carrega dados PKL (sinais fisiológicos)
2. Extrai sinais do Empatica E4 (wrist): BVP, EDA, Temperatura
3. Resample para 4 Hz (frequência comum)
4. Aplica filtragem Butterworth
5. Cria janelas de 60s com 50% overlap
6. Extrai features (tempo + frequência)
7. Filtra labels (stress vs. amusement)
8. Normaliza e divide em train/val/test
9. Salva dados processados no Google Drive

**⚠️ IMPORTANTE**: Execute este notebook apenas UMA VEZ. Os dados processados são salvos permanentemente.


In [None]:
# ============================================================================
# STEP 1: Setup (run 00_colab_setup.ipynb first)
# ============================================================================

print("="*70)
print("WESAD PREPROCESSING")
print("="*70)

# Import our preprocessing module
from src.preprocessing.wesad import preprocess_wesad, load_processed_wesad
import os

# Define paths
RAW_DATA_PATH = '/content/drive/MyDrive/mhealth-data/raw/wesad'
PROCESSED_DATA_PATH = '/content/drive/MyDrive/mhealth-data/processed/wesad'

print(f"Raw data path: {RAW_DATA_PATH}")
print(f"Processed data path: {PROCESSED_DATA_PATH}")

# Check if raw data exists
if not os.path.exists(RAW_DATA_PATH):
    print(f"❌ Raw data directory not found: {RAW_DATA_PATH}")
    print("Please download WESAD dataset and place it in the raw directory.")
    print("See data/README.md for download instructions.")
else:
    print("✅ Raw data directory found")
    
    # List available files
    files = os.listdir(RAW_DATA_PATH)
    pkl_files = [f for f in files if f.endswith('.pkl')]
    
    print(f"Found {len(pkl_files)} pickle files")
    if pkl_files:
        print("Sample files:", pkl_files[:3])


In [None]:
# ============================================================================
# STEP 2: Run Preprocessing
# ============================================================================

if os.path.exists(RAW_DATA_PATH) and len(pkl_files) > 0:
    print("Starting WESAD preprocessing...")
    
    # Run preprocessing
    preprocessing_info = preprocess_wesad(
        data_dir=RAW_DATA_PATH,
        output_dir=PROCESSED_DATA_PATH,
        test_size=0.15,
        val_size=0.15,
        random_state=42
    )
    
    print("\n" + "="*70)
    print("PREPROCESSING COMPLETE!")
    print("="*70)
    print(f"Preprocessing info: {preprocessing_info}")
    
else:
    print("❌ Cannot proceed without raw data files")
    print("Please download the WESAD dataset first.")


In [None]:
# ============================================================================
# STEP 3: Verify Processed Data
# ============================================================================

if os.path.exists(PROCESSED_DATA_PATH):
    print("Verifying processed data...")
    
    # Load processed data to verify
    X_train, X_val, X_test, y_train, y_val, y_test, scaler, label_encoder, info = load_processed_wesad(PROCESSED_DATA_PATH)
    
    print(f"\nData shapes:")
    print(f"  Train: {X_train.shape}")
    print(f"  Val:   {X_val.shape}")
    print(f"  Test:  {X_test.shape}")
    
    print(f"\nLabel distribution:")
    print(f"  Train: {np.bincount(y_train)}")
    print(f"  Val:   {np.bincount(y_val)}")
    print(f"  Test:  {np.bincount(y_test)}")
    
    print(f"\nClass names: {info['class_names']}")
    print(f"Features per sample: {info['n_features']}")
    print(f"Original labels: {info['original_labels']}")
    print(f"Filtered labels: {info['filtered_labels']}")
    
    print("\n✅ WESAD preprocessing completed successfully!")
    print("Data is ready for training models.")
    
else:
    print("❌ Processed data not found. Please run preprocessing first.")
