# Federated Learning Testing (Colab)

This notebook clones the repository and runs Federated Learning (simulation) on Sleep-EDF or WESAD.

## Features
- Choose dataset (Sleep-EDF or WESAD)
- Configure FL (clients, rounds, seed)
- Optional: link data from Google Drive
- Runs training and visualizes results

Run all cells from top to bottom.


In [None]:
# 1) Setup (install deps and clone repo)
!pip install -q flwr==1.7.0 ray==2.12.0 seaborn scikit-learn

import os, sys, shutil, json
from pathlib import Path

# Clone repo
if not Path('/content/mhealth-data-privacy').exists():
    !git clone https://github.com/vasco-fernandes21/mhealth-data-privacy.git /content/mhealth-data-privacy

%cd /content/mhealth-data-privacy
print("Repo ready:", os.getcwd())


In [None]:
# 2) Configuration
DATASET = "sleep-edf"  # "sleep-edf" or "wesad"
NUM_CLIENTS = 3
NUM_ROUNDS = 5
TRAIN_SEED = 42

os.environ["NUM_CLIENTS"] = str(NUM_CLIENTS)
os.environ["NUM_ROUNDS"] = str(NUM_ROUNDS)
os.environ["TRAIN_SEED"] = str(TRAIN_SEED)

print("Config:", DATASET, NUM_CLIENTS, NUM_ROUNDS, TRAIN_SEED)


In [None]:
# 3) Optional: link Google Drive data to repo
USE_DRIVE_DATA = True

if USE_DRIVE_DATA:
    from google.colab import drive
    drive.mount('/content/drive')

    drive_base = "/content/drive/MyDrive/mhealth-data/data/processed"
    repo_proc = "/content/mhealth-data-privacy/data/processed"
    os.makedirs("/content/mhealth-data-privacy/data", exist_ok=True)

    if DATASET == "sleep-edf":
        src = f"{drive_base}/sleep-edf"
        dst = f"{repo_proc}/sleep-edf"
    else:
        src = f"{drive_base}/wesad"
        dst = f"{repo_proc}/wesad"

    # Clean destination and link
    if os.path.islink(dst) or os.path.exists(dst):
        try:
            if os.path.islink(dst):
                os.unlink(dst)
            else:
                shutil.rmtree(dst)
        except Exception as e:
            print("Warn when removing:", e)

    os.makedirs(repo_proc, exist_ok=True)
    !ln -sf "{src}" "{dst}"
    print("Data linked:", dst, "->", src)


In [None]:
# 4) Run FL training
import subprocess, time

if DATASET == "sleep-edf":
    script_path = "src/train/sleep-edf/federated-learning/train_fl.py"
    results_dir = f"models/sleep-edf/fl/fl_clients{NUM_CLIENTS}"
    results_json = f"{results_dir}/results_sleep_edf_fl.json"
else:
    script_path = "src/train/wesad/federated-learning/train_fl.py"
    results_dir = f"models/wesad/fl/fl_clients{NUM_CLIENTS}"
    results_json = f"{results_dir}/results_wesad_fl.json"

print("Running:", script_path)
t0 = time.time()
proc = subprocess.run([sys.executable, script_path], text=True, capture_output=True)
t1 = time.time()

print("Return code:", proc.returncode, "| Time:", f"{t1-t0:.1f}s")
print("STDOUT:\n", proc.stdout[:2000])
if proc.stderr:
    print("\nSTDERR (head):\n", proc.stderr[:2000])


In [None]:
# 5) Load results and visualize
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

if Path(results_json).exists():
    with open(results_json, "r") as f:
        res = json.load(f)

    print("Final metrics:")
    for k in ["accuracy", "f1_score", "precision", "recall", "num_clients", "rounds", "training_time"]:
        if k in res:
            print(f"  {k}: {res[k]}")
    cm = np.array(res["confusion_matrix"]) 
    class_names = res["class_names"]

    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f"Confusion Matrix - {DATASET.upper()} FL ({NUM_CLIENTS} clients)")
    plt.xlabel("Predicted"); plt.ylabel("Actual")
    plt.tight_layout(); plt.show()
else:
    print("Results not found:", results_json)
