# Differential Privacy Training

Este notebook treina modelos LSTM com Differential Privacy para ambos os datasets.

## O que este notebook faz:
1. Carrega dados processados (Sleep-EDF e WESAD)
2. Treina modelos LSTM com DP para diferentes valores de epsilon
3. Avalia performance e privacidade
4. Salva modelos e resultados
5. Cria visualizações dos trade-offs

**Pré-requisitos**: Execute os notebooks 00, 01, 02 e 03 primeiro.


In [None]:
# ============================================================================
# STEP 1: Setup and Load Data
# ============================================================================

print("="*70)
print("DIFFERENTIAL PRIVACY TRAINING")
print("="*70)

# Import modules
from src.privacy.dp_training import train_with_dp, evaluate_dp_model, save_dp_model, get_dp_configs
from src.preprocessing.sleep_edf import load_processed_sleep_edf
from src.preprocessing.wesad import load_processed_wesad
from src.evaluation.visualization import plot_tradeoff_curve, plot_training_history
import numpy as np
import os

# Define paths
SLEEP_DATA_PATH = '/content/drive/MyDrive/mhealth-data/processed/sleep-edf'
WESAD_DATA_PATH = '/content/drive/MyDrive/mhealth-data/processed/wesad'
MODELS_PATH = '/content/drive/MyDrive/mhealth-data/models'
RESULTS_PATH = '/content/drive/MyDrive/mhealth-data/results'

print("Loading processed data...")

# Load Sleep-EDF data
if os.path.exists(SLEEP_DATA_PATH):
    X_train_sleep, X_val_sleep, X_test_sleep, y_train_sleep, y_val_sleep, y_test_sleep, _, _, sleep_info = load_processed_sleep_edf(SLEEP_DATA_PATH)
    print(f"✅ Sleep-EDF loaded: {X_train_sleep.shape}")
else:
    print("❌ Sleep-EDF data not found. Run notebook 01 first.")
    X_train_sleep = None

# Load WESAD data
if os.path.exists(WESAD_DATA_PATH):
    X_train_wesad, X_val_wesad, X_test_wesad, y_train_wesad, y_val_wesad, y_test_wesad, _, _, wesad_info = load_processed_wesad(WESAD_DATA_PATH)
    print(f"✅ WESAD loaded: {X_train_wesad.shape}")
else:
    print("❌ WESAD data not found. Run notebook 02 first.")
    X_train_wesad = None

# Define epsilon values to test
epsilon_values = [0.1, 1.0, 5.0, 10.0]
print(f"\nEpsilon values to test: {epsilon_values}")


In [None]:
# ============================================================================
# STEP 2: Train Sleep-EDF DP Models
# ============================================================================

if X_train_sleep is not None:
    print("\n" + "="*70)
    print("TRAINING SLEEP-EDF DP MODELS")
    print("="*70)
    
    sleep_dp_results = {}
    
    for epsilon in epsilon_values:
        print(f"\n--- Training with epsilon = {epsilon} ---")
        
        # Get DP configuration
        dp_configs = get_dp_configs([epsilon])
        config = dp_configs[0]
        config.update({
            'dataset': 'sleep_edf',
            'model_type': 'dp',
            'privacy_technique': 'DP',
            'privacy_parameter': f'ε={epsilon}'
        })
        
        # Train DP model
        model_dp, history_dp, privacy_info = train_with_dp(
            X_train_sleep, y_train_sleep,
            X_val_sleep, y_val_sleep,
            config
        )
        
        # Evaluate model
        results_dp = evaluate_dp_model(model_dp, X_test_sleep, y_test_sleep, config['window_size'])
        results_dp.update({
            'dataset': 'sleep_edf',
            'model_type': 'dp',
            'privacy_technique': 'DP',
            'privacy_parameter': f'ε={epsilon}',
            'epsilon': epsilon,
            'epsilon_actual': privacy_info['epsilon_actual']
        })
        
        # Save model and results
        model_name = f'sleep_edf_dp_epsilon_{epsilon}'
        save_dp_model(
            model_dp, history_dp, results_dp, privacy_info,
            MODELS_PATH, model_name
        )
        
        # Store results
        sleep_dp_results[f'Sleep-EDF DP (ε={epsilon})'] = results_dp
        
        print(f"✅ Sleep-EDF DP model (ε={epsilon}) trained successfully!")
        print(f"Test Accuracy: {results_dp['accuracy']:.4f}")
        print(f"Actual Epsilon: {privacy_info['epsilon_actual']:.4f}")
    
    print(f"\n✅ All Sleep-EDF DP models trained!")
    
else:
    print("❌ Skipping Sleep-EDF DP training - data not available")
    sleep_dp_results = {}


In [None]:
# ============================================================================
# STEP 3: Train WESAD DP Models
# ============================================================================

if X_train_wesad is not None:
    print("\n" + "="*70)
    print("TRAINING WESAD DP MODELS")
    print("="*70)
    
    wesad_dp_results = {}
    
    for epsilon in epsilon_values:
        print(f"\n--- Training with epsilon = {epsilon} ---")
        
        # Get DP configuration
        dp_configs = get_dp_configs([epsilon])
        config = dp_configs[0]
        config.update({
            'dataset': 'wesad',
            'model_type': 'dp',
            'privacy_technique': 'DP',
            'privacy_parameter': f'ε={epsilon}'
        })
        
        # Train DP model
        model_dp, history_dp, privacy_info = train_with_dp(
            X_train_wesad, y_train_wesad,
            X_val_wesad, y_val_wesad,
            config
        )
        
        # Evaluate model
        results_dp = evaluate_dp_model(model_dp, X_test_wesad, y_test_wesad, config['window_size'])
        results_dp.update({
            'dataset': 'wesad',
            'model_type': 'dp',
            'privacy_technique': 'DP',
            'privacy_parameter': f'ε={epsilon}',
            'epsilon': epsilon,
            'epsilon_actual': privacy_info['epsilon_actual']
        })
        
        # Save model and results
        model_name = f'wesad_dp_epsilon_{epsilon}'
        save_dp_model(
            model_dp, history_dp, results_dp, privacy_info,
            MODELS_PATH, model_name
        )
        
        # Store results
        wesad_dp_results[f'WESAD DP (ε={epsilon})'] = results_dp
        
        print(f"✅ WESAD DP model (ε={epsilon}) trained successfully!")
        print(f"Test Accuracy: {results_dp['accuracy']:.4f}")
        print(f"Actual Epsilon: {privacy_info['epsilon_actual']:.4f}")
    
    print(f"\n✅ All WESAD DP models trained!")
    
else:
    print("❌ Skipping WESAD DP training - data not available")
    wesad_dp_results = {}


In [None]:
# ============================================================================
# STEP 4: Create Trade-off Visualizations
# ============================================================================

print("\n" + "="*70)
print("CREATING DP TRADE-OFF VISUALIZATIONS")
print("="*70)

# Combine all DP results
all_dp_results = {}
all_dp_results.update(sleep_dp_results)
all_dp_results.update(wesad_dp_results)

if all_dp_results:
    # Create trade-off curve for Sleep-EDF
    if sleep_dp_results:
        plot_tradeoff_curve(
            sleep_dp_results,
            metric='accuracy',
            privacy_param='epsilon',
            save_path=f'{RESULTS_PATH}/dp_tradeoff_sleep_edf.png',
            title='Sleep-EDF: Privacy vs. Performance Trade-off'
        )
    
    # Create trade-off curve for WESAD
    if wesad_dp_results:
        plot_tradeoff_curve(
            wesad_dp_results,
            metric='accuracy',
            privacy_param='epsilon',
            save_path=f'{RESULTS_PATH}/dp_tradeoff_wesad.png',
            title='WESAD: Privacy vs. Performance Trade-off'
        )
    
    # Save all DP results
    from src.evaluation.metrics import save_evaluation_results
    save_evaluation_results(all_dp_results, RESULTS_PATH, 'dp_results.json')
    
    print("✅ DP visualizations created and results saved!")
    
else:
    print("❌ No DP results to visualize")


In [None]:
# ============================================================================
# STEP 5: Summary and Next Steps
# ============================================================================

print("\n" + "="*70)
print("DP TRAINING COMPLETE!")
print("="*70)

# Print summary
if sleep_dp_results:
    print("\nSleep-EDF DP Results:")
    for model_name, results in sleep_dp_results.items():
        print(f"  {model_name}: Accuracy={results['accuracy']:.4f}, F1={results['f1_score']:.4f}, ε={results['epsilon_actual']:.4f}")

if wesad_dp_results:
    print("\nWESAD DP Results:")
    for model_name, results in wesad_dp_results.items():
        print(f"  {model_name}: Accuracy={results['accuracy']:.4f}, F1={results['f1_score']:.4f}, ε={results['epsilon_actual']:.4f}")

print(f"\nModels saved to: {MODELS_PATH}")
print(f"Results saved to: {RESULTS_PATH}")

print("\nNext steps:")
print("1. Run notebook 05_train_fl.ipynb for Federated Learning")
print("2. Run notebook 06_analysis.ipynb for comprehensive analysis")

print("\n✅ DP models are ready for comparison with baseline and FL approaches!")
