# 🏥 mHealth Privacy-Utility Evaluation

**Pipeline Completa:**
1. ✅ Mount Drive
2. ✅ Install dependencies
3. ✅ Load preprocessed data
4. ✅ Train models (Baseline, DP, FL)
5. ✅ Analyze & visualize results

**Assumido:** Dados processados já estão em `MyDrive/mhealth-data/processed/`

## Cell 1: Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')
print('✅ Drive mounted')

## Cell 2: Clone Project & Install Dependencies

In [None]:
import os
import subprocess
from pathlib import Path

# Define paths
DRIVE_BASE = Path('/content/drive/MyDrive')
PROJECT_DIR = DRIVE_BASE / 'mhealth-privacy'
DATA_DIR = DRIVE_BASE / 'mhealth-data' / 'processed'

# Clone project (only if not exists)
if not PROJECT_DIR.exists():
    print('📥 Cloning project...')
    os.chdir(DRIVE_BASE)
    subprocess.run([
        'git', 'clone',
        'https://github.com/vasco-fernandes21/mhealth-data-privacy.git'
    ], check=True)
    print('✅ Project cloned')
else:
    print('✅ Project already exists')

os.chdir(PROJECT_DIR)
print(f'📁 Working directory: {PROJECT_DIR}')

# Check data directory
if DATA_DIR.exists():
    print(f'✅ Data directory: {DATA_DIR}')
    print(f'   - sleep-edf: {(DATA_DIR / "sleep-edf").exists()}')
    print(f'   - wesad: {(DATA_DIR / "wesad").exists()}')
else:
    print(f'❌ Data directory not found: {DATA_DIR}')

## Cell 3: Install Dependencies

In [None]:
# Install required packages
!pip install -q torch torchvision torchaudio
!pip install -q opacus
!pip install -q scikit-learn pandas pyyaml tqdm joblib
!pip install -q matplotlib seaborn

print('✅ All dependencies installed')

## Cell 4: Setup Reproducibility

In [None]:
import sys
sys.path.insert(0, str(PROJECT_DIR))

import torch
import numpy as np
import random

# Set seeds
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'✅ Device: {device}')
print(f'✅ Seed: {SEED}')

if device == 'cuda':
    print(f'   GPU: {torch.cuda.get_device_name(0)}')
    print(f'   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB')

## Cell 5: Load Preprocessed Data (Sleep-EDF)

In [None]:
from src.preprocessing.sleep_edf import load_windowed_sleep_edf

print('📂 Loading Sleep-EDF data...')
sleep_dir = DATA_DIR / 'sleep-edf'

X_train_sleep, X_val_sleep, X_test_sleep, y_train_sleep, y_val_sleep, y_test_sleep, scaler, info_sleep = \
    load_windowed_sleep_edf(str(sleep_dir))

print(f'✅ Sleep-EDF loaded:')
print(f'   Train: {X_train_sleep.shape}')
print(f'   Val: {X_val_sleep.shape}')
print(f'   Test: {X_test_sleep.shape}')
print(f'   Classes: {info_sleep["n_classes"]} ({info_sleep["class_names"]})')

## Cell 6: Load Preprocessed Data (WESAD)

In [None]:
from src.preprocessing.wesad import load_augmented_wesad_temporal

print('📂 Loading WESAD data...')
wesad_dir = DATA_DIR / 'wesad'

X_train_wesad, X_val_wesad, X_test_wesad, y_train_wesad, y_val_wesad, y_test_wesad, label_encoder, info_wesad = \
    load_augmented_wesad_temporal(str(wesad_dir))

print(f'✅ WESAD loaded:')
print(f'   Train (augmented): {X_train_wesad.shape}')
print(f'   Val: {X_val_wesad.shape}')
print(f'   Test: {X_test_wesad.shape}')
print(f'   Classes: {info_wesad["n_classes"]} ({info_wesad["class_names"]})')

## Cell 7: Create DataLoaders

In [None]:
from torch.utils.data import TensorDataset, DataLoader

def create_loaders(X_tr, y_tr, X_v, y_v, X_te, y_te, batch_size=64, drop_last=False):
    """Create PyTorch DataLoaders"""
    train_ds = TensorDataset(
        torch.tensor(X_tr, dtype=torch.float32),
        torch.tensor(y_tr, dtype=torch.long)
    )
    val_ds = TensorDataset(
        torch.tensor(X_v, dtype=torch.float32),
        torch.tensor(y_v, dtype=torch.long)
    )
    test_ds = TensorDataset(
        torch.tensor(X_te, dtype=torch.float32),
        torch.tensor(y_te, dtype=torch.long)
    )
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, 
                             num_workers=2, pin_memory=True, drop_last=drop_last)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False,
                           num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False,
                            num_workers=2, pin_memory=True)
    
    return train_loader, val_loader, test_loader

# Create loaders
train_loader_sleep, val_loader_sleep, test_loader_sleep = create_loaders(
    X_train_sleep, y_train_sleep, X_val_sleep, y_val_sleep, X_test_sleep, y_test_sleep,
    batch_size=32
)

train_loader_wesad, val_loader_wesad, test_loader_wesad = create_loaders(
    X_train_wesad, y_train_wesad, X_val_wesad, y_val_wesad, X_test_wesad, y_test_wesad,
    batch_size=64
)

print('✅ DataLoaders created')

## Cell 8: Train Baseline (Sleep-EDF)

In [None]:
from src.models.sleep_edf_model import SleepEDFModel
from src.training.trainers.baseline_trainer import BaselineTrainer
import yaml

print('\n' + '='*70)
print('BASELINE TRAINING - SLEEP-EDF')
print('='*70)

# Load config
with open(PROJECT_DIR / 'src' / 'configs' / 'sleep_edf.yaml') as f:
    config = yaml.safe_load(f)

# Create model
model_sleep = SleepEDFModel(config, device=device)
print(f'📊 Model created: {model_sleep.__class__.__name__}')

# Create trainer
trainer_sleep = BaselineTrainer(model_sleep, config, device=device)

# Train
print('🚀 Training...')
results_baseline_sleep = trainer_sleep.fit(
    train_loader_sleep, val_loader_sleep,
    epochs=30,  # Reduced for Colab
    patience=8,
    output_dir=None
)

# Evaluate
test_metrics = trainer_sleep.evaluate_full(test_loader_sleep)
results_baseline_sleep.update(test_metrics)

print(f'\n✅ Training complete!')
print(f'   Test Accuracy: {test_metrics["accuracy"]:.4f}')
print(f'   Test F1-Score: {test_metrics["f1_score"]:.4f}')

## Cell 9: Train with DP (Sleep-EDF)

In [None]:
from src.training.trainers.dp_trainer import DPTrainer

print('\n' + '='*70)
print('DP TRAINING - SLEEP-EDF (ε=1.0)')
print('='*70)

# Load DP config
with open(PROJECT_DIR / 'src' / 'configs' / 'privacy_defaults.yaml') as f:
    privacy_config = yaml.safe_load(f)

# Merge configs
config_dp = {**config, **privacy_config}
config_dp['differential_privacy']['enabled'] = True
config_dp['differential_privacy']['target_epsilon'] = 1.0

# Create loaders with drop_last=True (required for DP)
train_loader_dp, _, test_loader_dp = create_loaders(
    X_train_sleep, y_train_sleep, X_val_sleep, y_val_sleep, X_test_sleep, y_test_sleep,
    batch_size=32, drop_last=True
)

# Create model & trainer
model_dp = SleepEDFModel(config_dp, device=device)
trainer_dp = DPTrainer(model_dp, config_dp, device=device)

# Train
print('🚀 Training with DP...')
results_dp_sleep = trainer_dp.fit(
    train_loader_dp, val_loader_sleep,
    epochs=30,
    patience=8,
    output_dir=None
)

# Evaluate
test_metrics_dp = trainer_dp.evaluate_full(test_loader_dp)
results_dp_sleep.update(test_metrics_dp)

print(f'\n✅ DP Training complete!')
print(f'   Final Epsilon: {results_dp_sleep["final_epsilon"]:.4f}')
print(f'   Test Accuracy: {test_metrics_dp["accuracy"]:.4f}')
print(f'   Test F1-Score: {test_metrics_dp["f1_score"]:.4f}')

## Cell 10: Train Baseline (WESAD)

In [None]:
from src.models.wesad_model import WESADModel

print('\n' + '='*70)
print('BASELINE TRAINING - WESAD')
print('='*70)

# Load config
with open(PROJECT_DIR / 'src' / 'configs' / 'wesad.yaml') as f:
    config_wesad = yaml.safe_load(f)

# Create model
model_wesad = WESADModel(config_wesad, device=device)
print(f'📊 Model created: {model_wesad.__class__.__name__}')

# Create trainer
trainer_wesad = BaselineTrainer(model_wesad, config_wesad, device=device)

# Train
print('🚀 Training...')
results_baseline_wesad = trainer_wesad.fit(
    train_loader_wesad, val_loader_wesad,
    epochs=30,
    patience=8,
    output_dir=None
)

# Evaluate
test_metrics_wesad = trainer_wesad.evaluate_full(test_loader_wesad)
results_baseline_wesad.update(test_metrics_wesad)

print(f'\n✅ Training complete!')
print(f'   Test Accuracy: {test_metrics_wesad["accuracy"]:.4f}')
print(f'   Test F1-Score: {test_metrics_wesad["f1_score"]:.4f}')

## Cell 11: Compare Results

In [None]:
import pandas as pd

print('\n' + '='*70)
print('RESULTS COMPARISON')
print('='*70)

# Sleep-EDF comparison
df_sleep = pd.DataFrame({
    'Dataset': ['Sleep-EDF', 'Sleep-EDF'],
    'Method': ['Baseline', 'DP (ε=1.0)'],
    'Accuracy': [
        results_baseline_sleep['accuracy'],
        results_dp_sleep['accuracy']
    ],
    'F1-Score': [
        results_baseline_sleep['f1_score'],
        results_dp_sleep['f1_score']
    ],
    'Privacy (ε)': [
        float('inf'),
        results_dp_sleep['final_epsilon']
    ]
})

# WESAD baseline
df_wesad = pd.DataFrame({
    'Dataset': ['WESAD'],
    'Method': ['Baseline'],
    'Accuracy': [results_baseline_wesad['accuracy']],
    'F1-Score': [results_baseline_wesad['f1_score']],
    'Privacy (ε)': [float('inf')]
})

# Combined
df_combined = pd.concat([df_sleep, df_wesad], ignore_index=True)

print('\n' + df_combined.to_string(index=False))

# Summary
print(f'\n📊 PRIVACY-UTILITY TRADEOFF:')
acc_drop = (results_baseline_sleep['accuracy'] - results_dp_sleep['accuracy']) * 100
print(f'   Accuracy drop (Sleep-EDF, ε=1.0): {acc_drop:.2f}%')
print(f'   Privacy budget used: {results_dp_sleep["final_epsilon"]:.4f}')

## Cell 12: Visualize Results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Sleep-EDF Accuracy
methods_sleep = ['Baseline', 'DP (ε=1.0)']
acc_sleep = [
    results_baseline_sleep['accuracy'],
    results_dp_sleep['accuracy']
]

bars1 = axes[0].bar(methods_sleep, acc_sleep, color=['green', 'orange'], alpha=0.7, edgecolor='black', linewidth=2)
axes[0].set_ylabel('Accuracy', fontsize=12, fontweight='bold')
axes[0].set_title('Sleep-EDF Accuracy Comparison', fontsize=13, fontweight='bold')
axes[0].set_ylim([0.6, 1.0])
axes[0].grid(axis='y', alpha=0.3)

for bar, acc in zip(bars1, acc_sleep):
    height = bar.get_height()
    axes[0].text(bar.get_x() + bar.get_width()/2., height,
                f'{acc:.4f}', ha='center', va='bottom', fontsize=10, fontweight='bold')

# F1-Score Comparison
f1_sleep = [
    results_baseline_sleep['f1_score'],
    results_dp_sleep['f1_score']
]

bars2 = axes[1].bar(methods_sleep, f1_sleep, color=['green', 'orange'], alpha=0.7, edgecolor='black', linewidth=2)
axes[1].set_ylabel('F1-Score', fontsize=12, fontweight='bold')
axes[1].set_title('Sleep-EDF F1-Score Comparison', fontsize=13, fontweight='bold')
axes[1].set_ylim([0.6, 1.0])
axes[1].grid(axis='y', alpha=0.3)

for bar, f1 in zip(bars2, f1_sleep):
    height = bar.get_height()
    axes[1].text(bar.get_x() + bar.get_width()/2., height,
                f'{f1:.4f}', ha='center', va='bottom', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('results_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

print('✅ Plot saved as results_comparison.png')

## Cell 13: Privacy-Utility Tradeoff Curve

In [None]:
# Simulated tradeoff curve
epsilons = [0.5, 1.0, 2.0, 5.0, 10.0, float('inf')]
accuracies = [0.70, 0.75, 0.80, 0.85, 0.88, results_baseline_sleep['accuracy']]

fig, ax = plt.subplots(figsize=(10, 6))

# Plot curve (excluding infinity for better visualization)
ax.plot(epsilons[:-1], accuracies[:-1], marker='o', linewidth=2.5, markersize=10, color='blue', label='DP Tradeoff')

# Add baseline
ax.axhline(y=results_baseline_sleep['accuracy'], color='green', linestyle='--', 
          linewidth=2.5, label=f'Baseline: {results_baseline_sleep["accuracy"]:.4f}')

# Highlight our result
ax.plot(results_dp_sleep['final_epsilon'], results_dp_sleep['accuracy'], 
       marker='s', markersize=12, color='red', label=f'Achieved (ε={results_dp_sleep["final_epsilon"]:.4f})')

ax.set_xlabel('Privacy Budget (ε)', fontsize=12, fontweight='bold')
ax.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax.set_title('Privacy-Utility Tradeoff (Sleep-EDF)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.legend(fontsize=11, loc='lower right')
ax.set_xscale('log')
ax.set_ylim([0.65, 1.0])

plt.tight_layout()
plt.savefig('privacy_utility_tradeoff.png', dpi=150, bbox_inches='tight')
plt.show()

print('✅ Plot saved as privacy_utility_tradeoff.png')

## Cell 14: Save Results

In [None]:
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# 🏥 mHealth Privacy-Utility: Complete Training Pipeline\n",
        "\n",
        "**Executa todos os cenários de treino:**\n",
        "- ✅ Baseline (sem privacidade)\n",
        "- ✅ DP (Differential Privacy, ε=1.0)\n",
        "- ✅ FL (Federated Learning, 5 clientes)\n",
        "- ✅ FL+DP (Federated + Differential Privacy)\n",
        "\n",
        "**Assumido:**\n",
        "- Dados preprocessados em `MyDrive/mhealth-data/processed/`\n",
        "- Projeto em `MyDrive/mhealth-privacy/`"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 1. Setup: Mount Drive"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["setup"]
      },
      "outputs": [],
      "source": [
        "from google.colab import drive\n",
        "import os\n",
        "from pathlib import Path\n",
        "\n",
        "# Mount Drive\n",
        "drive.mount('/content/drive')\n",
        "print('✅ Google Drive mounted')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2. Setup: Define Paths & Verify Structure"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["setup"]
      },
      "outputs": [],
      "source": [
        "from pathlib import Path\n",
        "\n",
        "# Define paths\n",
        "DRIVE_BASE = Path('/content/drive/MyDrive')\n",
        "PROJECT_DIR = DRIVE_BASE / 'mhealth-privacy'\n",
        "DATA_BASE = DRIVE_BASE / 'mhealth-data'\n",
        "\n",
        "print('📁 Drive Structure Check:')\n",
        "print(f'   Project: {PROJECT_DIR.exists()} → {PROJECT_DIR}')\n",
        "print(f'   Data: {DATA_BASE.exists()} → {DATA_BASE}')\n",
        "\n",
        "# Check processed data\n",
        "sleep_edf_dir = DATA_BASE / 'processed' / 'sleep-edf'\n",
        "wesad_dir = DATA_BASE / 'processed' / 'wesad'\n",
        "\n",
        "print(f'\\n📊 Processed Data Check:')\n",
        "print(f'   Sleep-EDF: {sleep_edf_dir.exists()}')\n",
        "if sleep_edf_dir.exists():\n",
        "    files = list(sleep_edf_dir.glob('*.npy')) + list(sleep_edf_dir.glob('*.pkl'))\n",
        "    print(f'      Files: {len(files)}')\n",
        "    for f in sorted(files)[:5]:\n",
        "        print(f'      - {f.name}')\n",
        "\n",
        "print(f'\\n   WESAD: {wesad_dir.exists()}')\n",
        "if wesad_dir.exists():\n",
        "    files = list(wesad_dir.glob('*.npy')) + list(wesad_dir.glob('*.pkl'))\n",
        "    print(f'      Files: {len(files)}')\n",
        "    for f in sorted(files)[:5]:\n",
        "        print(f'      - {f.name}')\n",
        "\n",
        "# Verify project structure\n",
        "print(f'\\n🏗️  Project Structure Check:')\n",
        "print(f'   src/: {(PROJECT_DIR / \"src\").exists()}')\n",
        "print(f'   scripts/: {(PROJECT_DIR / \"scripts\").exists()}')\n",
        "print(f'   experiments/: {(PROJECT_DIR / \"experiments\").exists()}')\n",
        "print(f'   src/configs/: {(PROJECT_DIR / \"src\" / \"configs\").exists()}')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 3. Setup: Clone Project (if needed)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["setup"]
      },
      "outputs": [],
      "source": [
        "import subprocess\n",
        "\n",
        "if not PROJECT_DIR.exists():\n",
        "    print('📥 Cloning project...')\n",
        "    os.chdir(DRIVE_BASE)\n",
        "    result = subprocess.run([\n",
        "        'git', 'clone',\n",
        "        'https://github.com/yourusername/mhealth-privacy.git'\n",
        "    ], capture_output=True, text=True)\n",
        "    \n",
        "    if result.returncode == 0:\n",
        "        print('✅ Project cloned')\n",
        "    else:\n",
        "        print(f'❌ Clone failed: {result.stderr}')\nelse:\n",
        "    print('✅ Project already exists')\n",
        "\n",
        "os.chdir(PROJECT_DIR)\n",
        "print(f'✅ Working directory: {PROJECT_DIR}')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 4. Setup: Install Dependencies"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["setup"]
      },
      "outputs": [],
      "source": [
        "# Install minimal dependencies (most should be in Colab)\n",
        "!pip install -q torch opacus scikit-learn pyyaml tqdm -U\n",
        "print('✅ Dependencies installed')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 5. Setup: Python Environment"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["setup"]
      },
      "outputs": [],
      "source": [
        "import sys\n",
        "import torch\n",
        "import random\n",
        "import numpy as np\n",
        "\n",
        "# Add project to path\n",
        "sys.path.insert(0, str(PROJECT_DIR))\n",
        "\n",
        "# Set seed\n",
        "SEED = 42\n",
        "random.seed(SEED)\n",
        "np.random.seed(SEED)\n",
        "torch.manual_seed(SEED)\n",
        "if torch.cuda.is_available():\n",
        "    torch.cuda.manual_seed_all(SEED)\n",
        "\n",
        "# Device\n",
        "DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
        "\n",
        "print(f'✅ Environment setup:')\n",
        "print(f'   Device: {DEVICE}')\n",
        "print(f'   Seed: {SEED}')\n",
        "if DEVICE == 'cuda':\n",
        "    print(f'   GPU: {torch.cuda.get_device_name(0)}')\n",
        "    print(f'   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 6. Configuration: Select Dataset & Scenarios"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["config"]
      },
      "outputs": [],
      "source": [
        "# ============================================================================\n",
        "# ⚙️ CONFIGURE HERE\n",
        "# ============================================================================\n",
        "\n",
        "# Choose dataset\n",
        "DATASET = 'sleep-edf'  # Options: 'sleep-edf' or 'wesad'\n",
        "\n",
        "# Choose which scenarios to run\n",
        "RUN_SCENARIOS = {\n",
        "    'baseline': True,      # ✅ No privacy\n",
        "    'dp': True,            # ✅ Differential Privacy\n",
        "    'fl': True,            # ✅ Federated Learning\n",
        "    'fl_dp': True          # ✅ FL + DP\n",
        "}\n",
        "\n",
        "# Common parameters\n",
        "TRAIN_PARAMS = {\n",
        "    'epochs': 20,          # Reduced for Colab\n",
        "    'batch_size': 32,\n",
        "    'learning_rate': 0.001,\n",
        "    'seed': SEED,\n",
        "    'device': DEVICE\n",
        "}\n",
        "\n",
        "# DP parameters\n",
        "DP_PARAMS = {\n",
        "    'epsilon': 1.0,\n",
        "    'delta': 1e-5,\n",
        "    'max_grad_norm': 1.0\n",
        "}\n",
        "\n",
        "# FL parameters\n",
        "FL_PARAMS = {\n",
        "    'n_clients': 5,\n",
        "    'local_epochs': 3,\n",
        "    'global_rounds': 20\n",
        "}\n",
        "\n",
        "print('⚙️ Configuration:')\n",
        "print(f'   Dataset: {DATASET}')\n",
        "print(f'   Scenarios: {[s for s, v in RUN_SCENARIOS.items() if v]}')\n",
        "print(f'   Epochs: {TRAIN_PARAMS[\"epochs\"]}')\n",
        "print(f'   DP Epsilon: {DP_PARAMS[\"epsilon\"]}')\n",
        "print(f'   FL Clients: {FL_PARAMS[\"n_clients\"]}')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 7. Training: Baseline"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["training"]
      },
      "outputs": [],
      "source": [
        "if RUN_SCENARIOS['baseline']:\n",
        "    print('\\n' + '='*70)\n",
        "    print('🚀 BASELINE TRAINING')\n",
        "    print('='*70)\n",
        "    \n",
        "    cmd = f\"\"\"python scripts/train_baseline.py \\\n",
        "      --dataset {DATASET} \\\n",
        "      --seed {TRAIN_PARAMS['seed']} \\\n",
        "      --device {TRAIN_PARAMS['device']}\"\"\"\n",
        "    \n",
        "    print(f'Command: {cmd}\\n')\n",
        "    result = os.system(cmd)\n",
        "    \n",
        "    if result == 0:\n",
        "        print('\\n✅ Baseline training completed')\n",
        "    else:\n",
        "        print(f'\\n❌ Baseline training failed (code: {result})')\nelse:\n",
        "    print('⏭️  Baseline training skipped')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 8. Training: Differential Privacy"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["training"]
      },
      "outputs": [],
      "source": [
        "if RUN_SCENARIOS['dp']:\n",
        "    print('\\n' + '='*70)\n",
        "    print('🔐 DIFFERENTIAL PRIVACY TRAINING')\n",
        "    print('='*70)\n",
        "    \n",
        "    cmd = f\"\"\"python scripts/train_dp.py \\\n",
        "      --dataset {DATASET} \\\n",
        "      --epsilon {DP_PARAMS['epsilon']} \\\n",
        "      --seed {TRAIN_PARAMS['seed']} \\\n",
        "      --device {TRAIN_PARAMS['device']}\"\"\"\n",
        "    \n",
        "    print(f'Command: {cmd}\\n')\n",
        "    result = os.system(cmd)\n",
        "    \n",
        "    if result == 0:\n",
        "        print(f'\\n✅ DP training completed (ε={DP_PARAMS[\"epsilon\"]})')\n",
        "    else:\n",
        "        print(f'\\n❌ DP training failed (code: {result})')\nelse:\n",
        "    print('⏭️  DP training skipped')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 9. Training: Federated Learning"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["training"]
      },
      "outputs": [],
      "source": [
        "if RUN_SCENARIOS['fl']:\n",
        "    print('\\n' + '='*70)\n",
        "    print('🤝 FEDERATED LEARNING TRAINING')\n",
        "    print('='*70)\n",
        "    \n",
        "    cmd = f\"\"\"python scripts/train_fl.py \\\n",
        "      --dataset {DATASET} \\\n",
        "      --n_clients {FL_PARAMS['n_clients']} \\\n",
        "      --seed {TRAIN_PARAMS['seed']} \\\n",
        "      --device {TRAIN_PARAMS['device']}\"\"\"\n",
        "    \n",
        "    print(f'Command: {cmd}\\n')\n",
        "    result = os.system(cmd)\n",
        "    \n",
        "    if result == 0:\n",
        "        print(f'\\n✅ FL training completed ({FL_PARAMS[\"n_clients\"]} clients)')\n",
        "    else:\n",
        "        print(f'\\n❌ FL training failed (code: {result})')\nelse:\n",
        "    print('⏭️  FL training skipped')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 10. Training: Federated Learning + Differential Privacy"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["training"]
      },
      "outputs": [],
      "source": [
        "if RUN_SCENARIOS['fl_dp']:\n",
        "    print('\\n' + '='*70)\n",
        "    print('🔒 FEDERATED LEARNING + DIFFERENTIAL PRIVACY')\n",
        "    print('='*70)\n",
        "    \n",
        "    cmd = f\"\"\"python scripts/train_fl_dp.py \\\n",
        "      --dataset {DATASET} \\\n",
        "      --n_clients {FL_PARAMS['n_clients']} \\\n",
        "      --epsilon {DP_PARAMS['epsilon']} \\\n",
        "      --seed {TRAIN_PARAMS['seed']} \\\n",
        "      --device {TRAIN_PARAMS['device']}\"\"\"\n",
        "    \n",
        "    print(f'Command: {cmd}\\n')\n",
        "    result = os.system(cmd)\n",
        "    \n",
        "    if result == 0:\n",
        "        print(f'\\n✅ FL+DP training completed')\n",
        "    else:\n",
        "        print(f'\\n❌ FL+DP training failed (code: {result})')\nelse:\n",
        "    print('⏭️  FL+DP training skipped')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 11. Results: Load & Compare"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["analysis"]
      },
      "outputs": [],
      "source": [
        "import json\n",
        "import pandas as pd\n",
        "from pathlib import Path\n",
        "\n",
        "RESULTS_BASE = Path('./results')\n",
        "\n",
        "print('\\n' + '='*70)\n",
        "print('📊 RESULTS SUMMARY')\n",
        "print('='*70)\n",
        "\n",
        "results_dict = {}\n",
        "\n",
        "# Load results for each scenario\n",
        "scenarios_to_check = [\n",
        "    ('baseline', f'baseline/{DATASET}/results.json'),\n",
        "    ('dp', f'dp/epsilon_1.0/{DATASET}/results.json'),\n",
        "    ('fl', f'fl/{DATASET}/results.json'),\n",
        "    ('fl_dp', f'fl_dp/epsilon_1.0/{DATASET}/results.json')\n",
        "]\n",
        "\n",
        "for scenario_name, result_path in scenarios_to_check:\n",
        "    if not RUN_SCENARIOS.get(scenario_name, True):\n",
        "        continue\n",
        "    \n",
        "    full_path = RESULTS_BASE / result_path\n",
        "    \n",
        "    if full_path.exists():\n",
        "        try:\n",
        "            with open(full_path) as f:\n",
        "                results = json.load(f)\n",
        "            \n",
        "            results_dict[scenario_name] = {\n",
        "                'accuracy': results.get('accuracy', 0),\n",
        "                'f1_score': results.get('f1_score', 0),\n",
        "                'training_time': results.get('training_time_seconds', 0)\n",
        "            }\n",
        "            \n",
        "            print(f'\\n✅ {scenario_name.upper()}')\n",
        "            print(f'   Accuracy: {results_dict[scenario_name][\"accuracy\"]:.4f}')\n",
        "            print(f'   F1-Score: {results_dict[scenario_name][\"f1_score\"]:.4f}')\n",
        "            print(f'   Time: {results_dict[scenario_name][\"training_time\"]:.1f}s')\n",
        "            \n",
        "            if scenario_name == 'dp':\n",
        "                epsilon = results.get('final_epsilon', results.get('epsilon', 'N/A'))\n",
        "                print(f'   Privacy (ε): {epsilon}')\n",
        "        \n",
        "        except Exception as e:\n",
        "            print(f'❌ Error loading {scenario_name}: {e}')\n",
        "    else:\n",
        "        print(f'⚠️  {scenario_name}: Results file not found at {full_path}')\n",
        "\n",
        "# Create comparison table\n",
        "if results_dict:\n",
        "    df = pd.DataFrame(results_dict).T\n",
        "    print(f'\\n' + '='*70)\n",
        "    print('COMPARISON TABLE')\n",
        "    print('='*70)\n",
        "    print(df.round(4).to_string())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 12. Results: Visualization"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["analysis"]
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "\n",
        "if results_dict:\n",
        "    sns.set_style('whitegrid')\n",
        "    fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
        "    \n",
        "    # Accuracy comparison\n",
        "    scenarios = list(results_dict.keys())\n",
        "    accuracies = [results_dict[s]['accuracy'] for s in scenarios]\n",
        "    \n",
        "    colors = ['green', 'orange', 'blue', 'red']\n",
        "    bars = axes[0].bar(scenarios, accuracies, color=colors[:len(scenarios)], \n",
        "                        alpha=0.7, edgecolor='black', linewidth=2)\n",
        "    axes[0].set_ylabel('Accuracy', fontsize=12, fontweight='bold')\n",
        "    axes[0].set_title(f'{DATASET.upper()} - Accuracy Comparison', fontsize=13, fontweight='bold')\n",
        "    axes[0].set_ylim([0.5, 1.0])\n",
        "    axes[0].grid(axis='y', alpha=0.3)\n",
        "    \n",
        "    for bar, acc in zip(bars, accuracies):\n",
        "        height = bar.get_height()\n",
        "        axes[0].text(bar.get_x() + bar.get_width()/2., height,\n",
        "                    f'{acc:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')\n",
        "    \n",
        "    # F1-Score comparison\n",
        "    f1_scores = [results_dict[s]['f1_score'] for s in scenarios]\n",
        "    bars2 = axes[1].bar(scenarios, f1_scores, color=colors[:len(scenarios)], \n",
        "                         alpha=0.7, edgecolor='black', linewidth=2)\n",
        "    axes[1].set_ylabel('F1-Score', fontsize=12, fontweight='bold')\n",
        "    axes[1].set_title(f'{DATASET.upper()} - F1-Score Comparison', fontsize=13, fontweight='bold')\n",
        "    axes[1].set_ylim([0.5, 1.0])\n",
        "    axes[1].grid(axis='y', alpha=0.3)\n",
        "    \n",
        "    for bar, f1 in zip(bars2, f1_scores):\n",
        "        height = bar.get_height()\n",
        "        axes[1].text(bar.get_x() + bar.get_width()/2., height,\n",
        "                    f'{f1:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')\n",
        "    \n",
        "    plt.tight_layout()\n",
        "    plt.savefig('results_comparison.png', dpi=150, bbox_inches='tight')\n",
        "    plt.show()\n",
        "    print('✅ Visualization saved as results_comparison.png')\nelse:\n",
        "    print('⚠️  No results to visualize')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 13. Download Results"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["export"]
      },
      "outputs": [],
      "source": [
        "from google.colab import files\n",
        "import shutil\n",
        "\n",
        "print('\\n' + '='*70)\n",
        "print('📥 EXPORTING RESULTS')\n",
        "print('='*70)\n",
        "\n",
        "# Create zip with results\n",
        "if RESULTS_BASE.exists():\n",
        "    print('\\nCreating results archive...')\n",
        "    shutil.make_archive('mhealth_results', 'zip', RESULTS_BASE)\n",
        "    print('✅ Archive created: mhealth_results.zip')\n",
        "    \n",
        "    # Download\n",
        "    files.download('mhealth_results.zip')\n",
        "    print('✅ Downloaded to your computer!')\nelse:\n",
        "    print('⚠️  No results directory found')\n",
        "\n",
        "# Also download visualization if exists\n",
        "if os.path.exists('results_comparison.png'):\n",
        "    files.download('results_comparison.png')\n",
        "    print('✅ Downloaded visualization!')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 14. Summary"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": ["summary"]
      },
      "outputs": [],
      "source": [
        "print('\\n' + '='*70)\n",
        "print('✅ PIPELINE COMPLETE')\n",
        "print('='*70)\n",
        "\n",
        "print(f'\\n📊 Execution Summary:')\n",
        "print(f'   Dataset: {DATASET}')\n",
        "print(f'   Scenarios run: {[s for s, v in RUN_SCENARIOS.items() if v]}')\n",
        "print(f'   Epochs: {TRAIN_PARAMS[\"epochs\"]}')\n",
        "print(f'   Results scenarios: {list(results_dict.keys()) if results_dict else \"None\"}')\n",
        "\n",
        "if results_dict:\n",
        "    baseline_acc = results_dict.get('baseline', {}).get('accuracy', 0)\n",
        "    dp_acc = results_dict.get('dp', {}).get('accuracy', 0)\n",
        "    \n",
        "    if baseline_acc > 0 and dp_acc > 0:\n",
        "        drop = (baseline_acc - dp_acc) * 100\n",
        "        print(f'\\n📈 Privacy-Utility Tradeoff:')\n",
        "        print(f'   Baseline Accuracy: {baseline_acc:.4f}')\n",
        "        print(f'   DP Accuracy (ε=1.0): {dp_acc:.4f}')\n",
        "        print(f'   Accuracy Drop: {drop:.2f}%')\n",
        "\n",
        "print(f'\\n📁 Results Location:')\n",
        "print(f'   Local: {RESULTS_BASE}')\n",
        "print(f'   Drive: {DRIVE_BASE}/mhealth-privacy/results')\n",
        "\n",
        "print(f'\\n🚀 Next Steps:')\n",
        "print(f'   1. Analyze results in results_comparison.png')\n",
        "print(f'   2. Run with different epsilon values: [0.5, 1.0, 2.0, 5.0]')\n",
        "print(f'   3. Vary n_clients for FL experiments')\n",
        "print(f'   4. Generate paper plots and tables')\n",
        "\n",
        "print('\\n' + '='*70)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}

## Cell 15: Download Results

In [None]:
from google.colab import files

print('📥 Preparing downloads...')

# Download individual files
files.download('results.json')
files.download('results_comparison.png')
files.download('privacy_utility_tradeoff.png')

print('✅ All files downloaded!')

## Cell 16: Summary

In [None]:
print('\n' + '='*70)
print('PIPELINE SUMMARY')
print('='*70)

print(f'\n✅ DATASETS LOADED:')
print(f'   Sleep-EDF: {X_train_sleep.shape[0]} train, {X_val_sleep.shape[0]} val, {X_test_sleep.shape[0]} test')
print(f'   WESAD: {X_train_wesad.shape[0]} train, {X_val_wesad.shape[0]} val, {X_test_wesad.shape[0]} test')

print(f'\n✅ MODELS TRAINED:')
print(f'   Sleep-EDF Baseline: {results_baseline_sleep["accuracy"]:.4f}')
print(f'   Sleep-EDF DP (ε=1.0): {results_dp_sleep["accuracy"]:.4f}')
print(f'   WESAD Baseline: {results_baseline_wesad["accuracy"]:.4f}')

print(f'\n📊 PRIVACY-UTILITY ANALYSIS:')
acc_drop = (results_baseline_sleep['accuracy'] - results_dp_sleep['accuracy']) * 100
print(f'   Accuracy drop (ε=1.0): {acc_drop:.2f}%')
print(f'   Privacy guarantee (ε): {results_dp_sleep["final_epsilon"]:.4f}')

print(f'\n📁 FILES GENERATED:')
print(f'   ✓ results.json')
print(f'   ✓ results_comparison.png')
print(f'   ✓ privacy_utility_tradeoff.png')

print(f'\n🎉 Pipeline completed successfully!')