# üöÄ mHealth Privacy Experiments - Optimized Pipeline

**Simplified training pipeline using project scripts**

- ‚úÖ Setup (Drive, Clone, Dependencies)
- ‚úÖ Update/Reclonar anytime
- ‚úÖ Configure & Run experiments
- ‚úÖ Load & Analyze results

**Author** Eduardo Barbosa, Filipe Correia, Vasco Fernandes  
**Version:** 2.0 (Optimized)  
**Date:** October 2025

## 1Ô∏è‚É£ SETUP: Mount Drive & Clone Repository

In [None]:
from google.colab import drive
from pathlib import Path
import os
import sys
import subprocess
import shutil

# Mount Drive
drive.mount('/content/drive', force_remount=True)

# Setup paths
DRIVE_BASE = Path('/content/drive/MyDrive')
DATA_DRIVE = DRIVE_BASE / 'mhealth-data/data'
PROJECT_DIR = Path('/content/mhealth-data-privacy')
REPO_URL = 'https://github.com/vasco-fernandes21/mhealth-data-privacy.git'

print('üìÅ Paths Configuration')
print(f'  Project: {PROJECT_DIR}')
print(f'  Data (Drive): {DATA_DRIVE}')
print(f'  Data exists: {DATA_DRIVE.exists()}')

# Clone or update repository
if not PROJECT_DIR.exists():
    print(f'\nüì• Cloning repository...')
    result = subprocess.run(['git', 'clone', REPO_URL, str(PROJECT_DIR)],
                          capture_output=True, text=True)
    if result.returncode == 0:
        print('‚úÖ Repository cloned')
    else:
        print(f'‚ùå Clone failed: {result.stderr}')
else:
    print('‚úÖ Project already exists')
    os.chdir(PROJECT_DIR)
    result = subprocess.run(['git', 'pull'], capture_output=True, text=True)
    if result.returncode == 0:
        print('‚úÖ Repository updated')

os.chdir(PROJECT_DIR)

# Create symlink for data
data_link = PROJECT_DIR / 'data'
if not data_link.exists() and DATA_DRIVE.exists():
    print(f'\nüîó Creating symlink: {data_link} ‚Üí {DATA_DRIVE}')
    os.symlink(DATA_DRIVE, data_link)
    print('‚úÖ Symlink created')
elif data_link.is_symlink():
    print(f'\nüîó Symlink already exists')

print(f'\n‚úÖ Setup complete!')
print(f'   Working directory: {PROJECT_DIR}')

Mounted at /content/drive
üìÅ Paths Configuration
  Project: /content/mhealth-data-privacy
  Data (Drive): /content/drive/MyDrive/mhealth-data/data
  Data exists: True

üì• Cloning repository...
‚úÖ Repository cloned

üîó Creating symlink: /content/mhealth-data-privacy/data ‚Üí /content/drive/MyDrive/mhealth-data/data
‚úÖ Symlink created

‚úÖ Setup complete!
   Working directory: /content/mhealth-data-privacy


## 2Ô∏è‚É£ SETUP: Install Dependencies

In [None]:
!pip install -q opacus scikit-learn pyyaml tqdm pyedflib mne

import torch
import json
import time
from datetime import datetime
import pandas as pd
import numpy as np

# Setup device
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
RESULTS_DIR = PROJECT_DIR / 'experiments'
RESULTS_DIR.mkdir(exist_ok=True)

print('‚úÖ Dependencies installed')
print(f'\nüìä Environment:')
print(f'   Device: {DEVICE}')
if DEVICE == 'cuda':
    print(f'   GPU: {torch.cuda.get_device_name(0)}')
    print(f'   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB')

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/254.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m254.4/254.4 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/2.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m2.8/2.8 MB[0m [31m131.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.8/2.8 MB[0m [31m75.5 MB/s[0m eta [36m0:00:00[0m
[?25

## 3Ô∏è‚É£ UPDATE: Repository (Anytime)

In [None]:
# ============================================================================
# üîÑ REPOSITORY UPDATE
# ============================================================================

# ‚öôÔ∏è CONFIGURE ACTION HERE
UPDATE_ACTION = 'reset'  # Options: 'pull', 'reset', 'reclone'

# ============================================================================

def execute_update():
    """Execute repository update based on UPDATE_ACTION variable."""

    action = UPDATE_ACTION.lower().strip()

    print(f"\n{'='*70}")
    print(f"üîÑ REPOSITORY UPDATE: {action.upper()}")
    print(f"{'='*70}\n")

    try:
        if action == 'pull':
            os.chdir(PROJECT_DIR)
            print("üì• Pulling latest changes...")
            result = subprocess.run(['git', 'pull'], capture_output=True, text=True)

            if result.returncode == 0:
                print('‚úÖ Pull successful')
                if result.stdout:
                    print(result.stdout)
            else:
                print(f'‚ùå Pull failed: {result.stderr}')

        elif action == 'reset':
            os.chdir(PROJECT_DIR)
            print("üîÑ Resetting to origin/main...")
            result = subprocess.run(['git', 'reset', '--hard', 'origin/main'],
                                  capture_output=True, text=True)

            if result.returncode == 0:
                print('‚úÖ Reset successful')
                if result.stdout:
                    print(result.stdout)
            else:
                print(f'‚ùå Reset failed: {result.stderr}')

        elif action == 'reclone':
            print('‚ö†Ô∏è  DELETING and RECLONING repository\n')

            print('üóëÔ∏è  Deleting old project...')
            if PROJECT_DIR.exists():
                shutil.rmtree(PROJECT_DIR)
            print('‚úÖ Deleted')

            print('üì• Cloning repository...')
            result = subprocess.run(['git', 'clone', REPO_URL, str(PROJECT_DIR)],
                                  capture_output=True, text=True)

            if result.returncode == 0:
                print('‚úÖ Clone successful')

                print('üîó Creating symlink...')
                os.symlink(DATA_DRIVE, PROJECT_DIR / 'data')
                print('‚úÖ Symlink created')

                os.chdir(PROJECT_DIR)
                print('‚úÖ Reclone complete')
            else:
                print(f'‚ùå Clone failed: {result.stderr}')

        elif action == 'skip':
            print('‚è≠Ô∏è  Skipped (UPDATE_ACTION = "skip")')

        else:
            print(f'‚ùå Unknown action: {action}')
            print('Available: pull, reset, reclone, skip')

        print(f"\n{'='*70}\n")

    except Exception as e:
        print(f'‚ùå Error: {e}\n')

# AUTO-EXECUTE based on UPDATE_ACTION
execute_update()


üîÑ REPOSITORY UPDATE: RESET

üîÑ Resetting to origin/main...
‚úÖ Reset successful
HEAD is now at 363d32f test #2





## 4Ô∏è‚É£ CONFIG: Experiment Settings

In [None]:
# ============================================================================
# ‚öôÔ∏è CONFIGURE EXPERIMENTS HERE
# ============================================================================

# Which scenarios to run
SCENARIO = 'baseline'           # Options: baseline, dp, fl, fl_dp, all

# Filters
TAGS = ''                  # e.g., 'tier1' or '' (empty = no filter)
DATASETS = 'wesad'              # e.g., 'wesad', 'sleep-edf' or '' (empty = all)
N_EXPERIMENTS = None            # e.g., 3 (limit to first N) or None (all)

# Execution
DRY_RUN = False                 # True = show commands only
AUTO_MODE = True                # True = skip confirmation

# ============================================================================

print('‚öôÔ∏è CONFIGURATION')
print('='*70)
print(f'Scenario: {SCENARIO}')
print(f'Tags: {TAGS if TAGS else "(none)"}')
print(f'Datasets: {DATASETS if DATASETS else "(all)"}')
print(f'Limit experiments: {N_EXPERIMENTS if N_EXPERIMENTS else "(no limit)"}')
print(f'Dry run: {DRY_RUN}')
print(f'Auto mode: {AUTO_MODE}')
print('='*70)

‚öôÔ∏è CONFIGURATION
Scenario: baseline
Tags: (none)
Datasets: wesad
Limit experiments: (no limit)
Dry run: False
Auto mode: True


## 5Ô∏è‚É£ EXECUTE: Run Experiments

In [None]:
# Build command com -u (unbuffered)
cmd = [
    'python', '-u',  # ‚Üê ADICIONA ISTO (unbuffered output)
    'experiments/run_experiments.py',
    '--scenario', SCENARIO,
    '--device', DEVICE,
]

# Add filters
if TAGS:
    cmd += ['--tags', TAGS]
if DATASETS:
    cmd += ['--datasets', DATASETS]
if N_EXPERIMENTS:
    cmd += ['--n_experiments', str(N_EXPERIMENTS)]
if AUTO_MODE:
    cmd += ['--auto']

print('\n' + '='*70)
print('üöÄ RUNNING EXPERIMENTS')
print('='*70)
print(f'\nCommand: {" ".join(cmd)}\n')
print('üìä LIVE OUTPUT:')
print('='*70 + '\n')

# Execute with streaming output
process = subprocess.Popen(
    cmd,
    cwd=str(PROJECT_DIR),
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1,  # Line buffering
    universal_newlines=True
)

# Stream output in real-time
for line in process.stdout:
    print(line, end='', flush=True)

# Wait for completion
return_code = process.wait()

print('\n' + '='*70)
if return_code == 0:
    print('‚úÖ Experiments completed successfully')
else:
    print(f'‚ùå Experiments failed (exit code: {return_code})')
print('='*70)


üöÄ RUNNING EXPERIMENTS

Command: python -u experiments/run_experiments.py --scenario baseline --device cuda --datasets wesad --auto

üìä LIVE OUTPUT:

Device: cuda

Loaded 6 experiments

Will run: 3 experiments


Running 3 experiments...

[1/3]

baseline_wesad_run1 | wesad | baseline | seed=42

Config: batch_size=256, lr=0.0003, epochs=80

Loading data...
Loading WESAD from data/processed/wesad...
‚úÖ Loaded:
   Train: (1217, 14, 1024)
   Val:   (407, 14, 1024)
   Test:  (408, 14, 1024)
   Classes: ['non-stress', 'stress']

Shapes: train=(1217, 14, 1024), val=(407, 14, 1024), test=(408, 14, 1024)

Creating model...
Model: 118,530 parameters

Epoch 001: train_loss=0.6328 train_acc=0.6606 | val_loss=0.5455 val_acc=0.7346
Epoch 002: train_loss=0.5131 train_acc=0.7831 | val_loss=0.4793 val_acc=0.7494
Epoch 003: train_loss=0.4467 train_acc=0.8348 | val_loss=0.4432 val_acc=0.8034
Epoch 004: train_loss=0.3937 train_acc=0.8759 | val_loss=0.4218 val_acc=0.8452
Epoch 005: train_loss=0.3652 t

## 6Ô∏è‚É£ RESULTS: Load & Analyze

In [None]:
# Load results
print(RESULTS_DIR)
results_file = RESULTS_DIR / 'results_log.json'
print(results_file)


if results_file.exists():
    with open(results_file) as f:
        results = json.load(f)

    print('\n' + '='*70)
    print('üìä RESULTS SUMMARY')
    print('='*70)

    print(f'\nTimestamp: {results["timestamp"]}')
    print(f'Total experiments: {results["total"]}')
    print(f'Successful: {results["successful"]} ‚úÖ')
    print(f'Failed: {results["failed"]} ‚ùå')
    print(f'Success rate: {results["successful"]/results["total"]*100:.1f}%')
    print(f'Total time: {results["total_time_hours"]:.2f} hours')

    # Convert to DataFrame
    df = pd.DataFrame(results['results'])

    print(f'\nDetailed Results:')
    print(df[['name', 'method', 'dataset', 'seed', 'success', 'time_seconds']].to_string(index=False))

    print(f'\n{"="*70}')
else:
    print('‚ùå Results file not found')

/content/mhealth-data-privacy/experiments
/content/mhealth-data-privacy/experiments/results_log.json

üìä RESULTS SUMMARY

Timestamp: 2025-10-28T19:45:15.506866


KeyError: 'total'

## 7Ô∏è‚É£ DOWNLOAD: Results

In [None]:
from google.colab import files

print('\nüì• DOWNLOADING RESULTS')
print('='*70)

# Download JSON log
if results_file.exists():
    print(f'\nDownloading: experiments_log.json')
    files.download(str(results_file))

# Create archive of all results
if RESULTS_DIR.exists() and list(RESULTS_DIR.glob('*')):
    print(f'Creating archive...')
    archive = shutil.make_archive('/tmp/mhealth_results', 'zip', RESULTS_DIR)
    print(f'Downloading: mhealth_results.zip')
    files.download(archive)

print(f'\n‚úÖ Download complete!')
print('='*70)

## 8Ô∏è‚É£ UTILITIES: Helper Functions

In [None]:
def show_scenarios():
    """List available scenarios."""
    import yaml

    scenarios_dir = PROJECT_DIR / 'experiments' / 'scenarios'
    yaml_files = list(scenarios_dir.glob('*.yaml'))

    print(f'\nüìã Available Scenarios')
    for f in sorted(yaml_files):
        with open(f) as fp:
            data = yaml.safe_load(fp)
            n_exp = len(data.get('experiments', {}))
            print(f'  - {f.stem}: {n_exp} experiments')


def quick_test():
    """Run quick test (first 3 experiments)."""
    print('\nüöÄ Quick Test: Running first 3 baseline experiments on WESAD')

    cmd = [
        'python', 'experiments/run_experiments.py',
        '--scenario', 'baseline',
        '--tags', 'tier1',
        '--datasets', 'wesad',
        '--n_experiments', '1',
        '--device', DEVICE,
        '--auto'
    ]

    subprocess.run(cmd, cwd=str(PROJECT_DIR))


def show_last_results():
    """Show last run results."""
    if results_file.exists():
        with open(results_file) as f:
            results = json.load(f)

        df = pd.DataFrame(results['results'])
        print('\nüìä Last Run Results')
        print(df.to_string())
    else:
        print('No results available')


print('‚úÖ Utilities loaded')
print('\nUsage:')
print('  show_scenarios()      # List all scenarios')
print('  quick_test()          # Run quick test')
print('  show_last_results()   # Show last results')
print('  quick_update_menu()   # Update repository')

## 9Ô∏è‚É£ EXAMPLES

In [None]:
# ============================================================================
# EXAMPLE CONFIGURATIONS
# ============================================================================

print("üìù Example Configurations (Run in Section 4):\n")

print("Example 1: Quick test (1 experiment)")
print("""
SCENARIO = 'baseline'
TAGS = 'tier1'
DATASETS = 'wesad'
N_EXPERIMENTS = 1
AUTO_MODE = True
""")

print("\nExample 2: All baseline on WESAD")
print("""
SCENARIO = 'baseline'
TAGS = 'tier1'
DATASETS = 'wesad'
N_EXPERIMENTS = None
AUTO_MODE = True
""")

print("\nExample 3: DP experiments")
print("""
SCENARIO = 'dp'
TAGS = 'tier1'
DATASETS = 'wesad'
N_EXPERIMENTS = None
AUTO_MODE = True
""")

print("\nExample 4: All scenarios (long run)")
print("""
SCENARIO = 'all'
TAGS = 'tier1'
DATASETS = None
N_EXPERIMENTS = None
AUTO_MODE = True
""")

print("\nExample 5: Dry run (see commands)")
print("""
SCENARIO = 'baseline'
TAGS = 'tier1'
DATASETS = 'wesad'
DRY_RUN = True
AUTO_MODE = True
""")

In [None]:
# Download da results
import shutil
from google.colab import files

# Caminho da pasta a compactar
pasta_origem = "/content/mhealth-data-privacy/results"

# Caminho do zip de destino
zip_destino = "/content/results.zip"

# Criar zip
shutil.make_archive(base_name="/content/results", format='zip', root_dir=pasta_origem)

# Fazer download
files.download(zip_destino)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>