# 🚀 mHealth Privacy Experiments - Optimized Pipeline

**Simplified training pipeline using project scripts**

- ✅ Setup (Drive, Clone, Dependencies)
- ✅ Update/Reclonar anytime
- ✅ Configure & Run experiments
- ✅ Load & Analyze results

**Author** Eduardo Barbosa, Filipe Correia, Vasco Fernandes  
**Version:** 2.0 (Optimized)  
**Date:** October 2025

## 1️⃣ SETUP: Mount Drive & Clone Repository

In [None]:
from google.colab import drive
from pathlib import Path
import os
import sys
import subprocess
import shutil

# Mount Drive
drive.mount('/content/drive', force_remount=True)

# Setup paths
DRIVE_BASE = Path('/content/drive/MyDrive')
DATA_DRIVE = DRIVE_BASE / 'mhealth-data/data'
PROJECT_DIR = Path('/content/mhealth-data-privacy')
REPO_URL = 'https://github.com/vasco-fernandes21/mhealth-data-privacy.git'

print('📁 Paths Configuration')
print(f'  Project: {PROJECT_DIR}')
print(f'  Data (Drive): {DATA_DRIVE}')
print(f'  Data exists: {DATA_DRIVE.exists()}')

# Clone or update repository
if not PROJECT_DIR.exists():
    print(f'\n📥 Cloning repository...')
    result = subprocess.run(['git', 'clone', REPO_URL, str(PROJECT_DIR)],
                          capture_output=True, text=True)
    if result.returncode == 0:
        print('✅ Repository cloned')
    else:
        print(f'❌ Clone failed: {result.stderr}')
else:
    print('✅ Project already exists')
    os.chdir(PROJECT_DIR)
    result = subprocess.run(['git', 'pull'], capture_output=True, text=True)
    if result.returncode == 0:
        print('✅ Repository updated')

os.chdir(PROJECT_DIR)

# Create symlink for data
data_link = PROJECT_DIR / 'data'
if not data_link.exists() and DATA_DRIVE.exists():
    print(f'\n🔗 Creating symlink: {data_link} → {DATA_DRIVE}')
    os.symlink(DATA_DRIVE, data_link)
    print('✅ Symlink created')
elif data_link.is_symlink():
    print(f'\n🔗 Symlink already exists')

print(f'\n✅ Setup complete!')
print(f'   Working directory: {PROJECT_DIR}')

## 2️⃣ SETUP: Install Dependencies

In [None]:
!pip install -q opacus scikit-learn pyyaml tqdm pyedflib mne

import torch
import json
import time
from datetime import datetime
import pandas as pd
import numpy as np

# Setup device
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
RESULTS_DIR = PROJECT_DIR / 'results'
RESULTS_DIR.mkdir(exist_ok=True)

print('✅ Dependencies installed')
print(f'\n📊 Environment:')
print(f'   Device: {DEVICE}')
if DEVICE == 'cuda':
    print(f'   GPU: {torch.cuda.get_device_name(0)}')
    print(f'   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB')

## 3️⃣ UPDATE: Repository (Anytime)

In [None]:
def quick_update_menu():
    """Interactive menu to update repository."""
    
    print(f"\n{'='*70}")
    print("🔄 REPOSITORY UPDATE MENU")
    print(f"{'='*70}")
    print("\nOptions:")
    print("  1️⃣  git pull (update only)")
    print("  2️⃣  git reset --hard (discard changes)")
    print("  3️⃣  Reclone (delete + fresh clone)")
    print("  0️⃣  Cancel")
    
    choice = input("\nChoose (0-3): ").strip()
    
    if choice == '1':
        os.chdir(PROJECT_DIR)
        result = subprocess.run(['git', 'pull'], capture_output=True, text=True)
        print('✅ Pull successful' if result.returncode == 0 else f'❌ Failed: {result.stderr}')
    elif choice == '2':
        os.chdir(PROJECT_DIR)
        result = subprocess.run(['git', 'reset', '--hard', 'origin/main'], capture_output=True, text=True)
        print('✅ Reset successful' if result.returncode == 0 else f'❌ Failed: {result.stderr}')
    elif choice == '3':
        if PROJECT_DIR.exists():
            shutil.rmtree(PROJECT_DIR)
        subprocess.run(['git', 'clone', REPO_URL, str(PROJECT_DIR)])
        os.symlink(DATA_DRIVE, PROJECT_DIR / 'data')
        print('✅ Reclone complete')
    elif choice != '0':
        print("Invalid choice")

print('✅ Update functions loaded')
print('Usage: quick_update_menu()')

## 4️⃣ CONFIG: Experiment Settings

In [None]:
# ============================================================================
# ⚙️ CONFIGURE EXPERIMENTS HERE
# ============================================================================

# Which scenarios to run
SCENARIO = 'baseline'           # Options: baseline, dp, fl, fl_dp, all

# Filters
TAGS = 'tier1'                  # e.g., 'tier1' or '' (empty = no filter)
DATASETS = 'wesad'              # e.g., 'wesad', 'sleep-edf' or '' (empty = all)
N_EXPERIMENTS = None            # e.g., 3 (limit to first N) or None (all)

# Execution
DRY_RUN = False                 # True = show commands only
AUTO_MODE = True                # True = skip confirmation

# ============================================================================

print('⚙️ CONFIGURATION')
print('='*70)
print(f'Scenario: {SCENARIO}')
print(f'Tags: {TAGS if TAGS else "(none)"}')
print(f'Datasets: {DATASETS if DATASETS else "(all)"}')
print(f'Limit experiments: {N_EXPERIMENTS if N_EXPERIMENTS else "(no limit)"}')
print(f'Dry run: {DRY_RUN}')
print(f'Auto mode: {AUTO_MODE}')
print('='*70)

## 5️⃣ EXECUTE: Run Experiments

In [None]:
# Build command
cmd = [
    'python', 'experiments/run_experiments.py',
    '--scenario', SCENARIO,
    '--device', DEVICE,
    '--results_dir', str(RESULTS_DIR)
]

# Add filters
if TAGS:
    cmd += ['--tags', TAGS]
if DATASETS:
    cmd += ['--datasets', DATASETS]
if N_EXPERIMENTS:
    cmd += ['--n_experiments', str(N_EXPERIMENTS)]
if DRY_RUN:
    # Note: DRY_RUN not implemented in run_experiments.py, remove if not needed
    pass
if AUTO_MODE:
    cmd += ['--auto']

print('\n' + '='*70)
print('🚀 RUNNING EXPERIMENTS')
print('='*70)
print(f'\nCommand: {" ".join(cmd)}\n')

# Execute
result = subprocess.run(cmd, cwd=str(PROJECT_DIR))

if result.returncode == 0:
    print('\n✅ Experiments completed successfully')
else:
    print(f'\n❌ Experiments failed (exit code: {result.returncode})')

## 6️⃣ RESULTS: Load & Analyze

In [None]:
# Load results
results_file = RESULTS_DIR / 'experiments_log.json'

if results_file.exists():
    with open(results_file) as f:
        results = json.load(f)
    
    print('\n' + '='*70)
    print('📊 RESULTS SUMMARY')
    print('='*70)
    
    print(f'\nTimestamp: {results["timestamp"]}')
    print(f'Total experiments: {results["total"]}')
    print(f'Successful: {results["successful"]} ✅')
    print(f'Failed: {results["failed"]} ❌')
    print(f'Success rate: {results["successful"]/results["total"]*100:.1f}%')
    print(f'Total time: {results["total_time_hours"]:.2f} hours')
    
    # Convert to DataFrame
    df = pd.DataFrame(results['results'])
    
    print(f'\nDetailed Results:')
    print(df[['name', 'method', 'dataset', 'seed', 'success', 'time_seconds']].to_string(index=False))
    
    print(f'\n{"="*70}')
else:
    print('❌ Results file not found')

## 7️⃣ DOWNLOAD: Results

In [None]:
from google.colab import files

print('\n📥 DOWNLOADING RESULTS')
print('='*70)

# Download JSON log
if results_file.exists():
    print(f'\nDownloading: experiments_log.json')
    files.download(str(results_file))

# Create archive of all results
if RESULTS_DIR.exists() and list(RESULTS_DIR.glob('*')):
    print(f'Creating archive...')
    archive = shutil.make_archive('/tmp/mhealth_results', 'zip', RESULTS_DIR)
    print(f'Downloading: mhealth_results.zip')
    files.download(archive)

print(f'\n✅ Download complete!')
print('='*70)

## 8️⃣ UTILITIES: Helper Functions

In [None]:
def show_scenarios():
    """List available scenarios."""
    import yaml
    
    scenarios_dir = PROJECT_DIR / 'experiments' / 'scenarios'
    yaml_files = list(scenarios_dir.glob('*.yaml'))
    
    print(f'\n📋 Available Scenarios')
    for f in sorted(yaml_files):
        with open(f) as fp:
            data = yaml.safe_load(fp)
            n_exp = len(data.get('experiments', {}))
            print(f'  - {f.stem}: {n_exp} experiments')


def quick_test():
    """Run quick test (first 3 experiments)."""
    print('\n🚀 Quick Test: Running first 3 baseline experiments on WESAD')
    
    cmd = [
        'python', 'experiments/run_experiments.py',
        '--scenario', 'baseline',
        '--tags', 'tier1',
        '--datasets', 'wesad',
        '--n_experiments', '1',
        '--device', DEVICE,
        '--auto'
    ]
    
    subprocess.run(cmd, cwd=str(PROJECT_DIR))


def show_last_results():
    """Show last run results."""
    if results_file.exists():
        with open(results_file) as f:
            results = json.load(f)
        
        df = pd.DataFrame(results['results'])
        print('\n📊 Last Run Results')
        print(df.to_string())
    else:
        print('No results available')


print('✅ Utilities loaded')
print('\nUsage:')
print('  show_scenarios()      # List all scenarios')
print('  quick_test()          # Run quick test')
print('  show_last_results()   # Show last results')
print('  quick_update_menu()   # Update repository')

## 9️⃣ EXAMPLES

In [None]:
# ============================================================================
# EXAMPLE CONFIGURATIONS
# ============================================================================

print("📝 Example Configurations (Run in Section 4):\n")

print("Example 1: Quick test (1 experiment)")
print("""
SCENARIO = 'baseline'
TAGS = 'tier1'
DATASETS = 'wesad'
N_EXPERIMENTS = 1
AUTO_MODE = True
""")

print("\nExample 2: All baseline on WESAD")
print("""
SCENARIO = 'baseline'
TAGS = 'tier1'
DATASETS = 'wesad'
N_EXPERIMENTS = None
AUTO_MODE = True
""")

print("\nExample 3: DP experiments")
print("""
SCENARIO = 'dp'
TAGS = 'tier1'
DATASETS = 'wesad'
N_EXPERIMENTS = None
AUTO_MODE = True
""")

print("\nExample 4: All scenarios (long run)")
print("""
SCENARIO = 'all'
TAGS = 'tier1'
DATASETS = None
N_EXPERIMENTS = None
AUTO_MODE = True
""")

print("\nExample 5: Dry run (see commands)")
print("""
SCENARIO = 'baseline'
TAGS = 'tier1'
DATASETS = 'wesad'
DRY_RUN = True
AUTO_MODE = True
""")