# Federated Learning Testing (Colab)

This notebook clones the repository and runs Federated Learning (simulation) on Sleep-EDF or WESAD.

## Features
- Choose dataset (Sleep-EDF or WESAD)
- Configure FL (clients, rounds, seed)
- Optional: link data from Google Drive
- Runs training and visualizes results

Run all cells from top to bottom.


In [None]:
# =========================================
# Federated Learning Simulation (Colab)
# =========================================

# --- 0) Install dependencies ---
!pip install -q "protobuf==5.29.1" "cryptography<44"
!pip install -q "flwr==1.7.0" "ray==2.12.0" seaborn scikit-learn matplotlib

# --- 1) Clone repo ---
import os
from pathlib import Path
import shutil

repo_path = Path("/content/mhealth-data-privacy")

if not repo_path.exists():
    !git clone https://github.com/vasco-fernandes21/mhealth-data-privacy.git {repo_path}

%cd {repo_path}
print("Repo ready:", os.getcwd())


In [None]:
# --- 2) Configuration ---
DATASET = "sleep-edf"   # "sleep-edf" or "wesad"
NUM_CLIENTS = 3
NUM_ROUNDS = 5
TRAIN_SEED = 42
USE_DRIVE_DATA = True   # Link dataset from Google Drive

os.environ["NUM_CLIENTS"] = str(NUM_CLIENTS)
os.environ["NUM_ROUNDS"] = str(NUM_ROUNDS)
os.environ["TRAIN_SEED"] = str(TRAIN_SEED)

print("Configuration:", DATASET, NUM_CLIENTS, NUM_ROUNDS, TRAIN_SEED)


In [None]:
# --- 3) Optional: link Google Drive data ---
if USE_DRIVE_DATA:
    from google.colab import drive
    drive.mount('/content/drive')

    drive_base = "/content/drive/MyDrive/mhealth-data/data/processed"
    repo_proc = repo_path / "data/processed"
    os.makedirs(repo_proc, exist_ok=True)

    if DATASET == "sleep-edf":
        src = f"{drive_base}/sleep-edf"
        dst = repo_proc / "sleep-edf"
    else:
        src = f"{drive_base}/wesad"
        dst = repo_proc / "wesad"

    # Remove existing folder/symlink
    if dst.is_symlink() or dst.exists():
        if dst.is_symlink():
            dst.unlink()
        else:
            shutil.rmtree(dst)

    os.symlink(src, dst)
    print("Data linked:", dst, "->", src)


In [None]:
# --- 4) Run FL training (direct import) ---
import sys
import time
repo_root = repo_path
sys.path.insert(0, str(repo_root))

if DATASET == "sleep-edf":
    from src.train.sleep_edf.federated_learning.train_fl import main as fl_main
else:
    from src.train.wesad.federated_learning.train_fl import main as fl_main

print("\nStarting Federated Learning...")
t0 = time.time()
results = fl_main()
t1 = time.time()
print(f"FL training finished in {t1-t0:.1f}s")


In [None]:
# --- 5) Load results and visualize ---
import json
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

if results:
    print("\nFinal metrics:")
    for k in ["accuracy", "f1_score", "precision", "recall", "num_clients", "rounds", "training_time"]:
        if k in results:
            print(f"  {k}: {results[k]}")

    cm = np.array(results["confusion_matrix"])
    class_names = results["class_names"]

    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title(f"Confusion Matrix - {DATASET.upper()} FL ({NUM_CLIENTS} clients)")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()
else:
    print("No results found.")
