# Colab Setup Template

Este notebook contém o setup inicial para todos os outros notebooks do projeto.
**Execute sempre este setup antes de qualquer outro notebook.**

## O que este setup faz:
1. Clona o repositório GitHub
2. Instala o projeto como package
3. Monta o Google Drive
4. Configura paths para dados
5. Verifica dependências


In [None]:
# ============================================================================
# STEP 1: Clone GitHub Repository
# ============================================================================

print("="*70)
print("CLONING REPOSITORY")
print("="*70)

# Clone the repository 
!git clone https://github.com/vasco-fernandes21/mhealth-data-privacy.git
%cd mhealth-data-privacy

print("\nRepository cloned successfully!")
print(f"Current directory: {os.getcwd()}")


In [None]:
# ============================================================================
# STEP 2: Install Project as Package
# ============================================================================

print("="*70)
print("INSTALLING PROJECT PACKAGE")
print("="*70)

# First, ensure we have pre-compiled wheels for core packages
print("Installing core dependencies with pre-compiled wheels...")
!pip install --only-binary=all --upgrade numpy pandas scipy scikit-learn

# Install the project as an editable package (without dependencies first)
print("\nInstalling project package...")
!pip install -e . --no-deps

# Install remaining dependencies
print("\nInstalling remaining dependencies...")
!pip install tensorflow-privacy flwr[simulation] mne pyedflib matplotlib seaborn joblib

print("\nPackage installed successfully!")
print("You can now import modules with: from src.models import lstm_baseline")


In [None]:
# ============================================================================
# STEP 3: Mount Google Drive
# ============================================================================

print("="*70)
print("MOUNTING GOOGLE DRIVE")
print("="*70)

from google.colab import drive
drive.mount('/content/drive')

print("\nGoogle Drive mounted successfully!")
print("Drive contents available at: /content/drive/MyDrive/")


In [None]:
# ============================================================================
# STEP 4: Setup Data Paths
# ============================================================================

print("="*70)
print("SETTING UP DATA PATHS")
print("="*70)

import os

# Define paths
DRIVE_BASE = '/content/drive/MyDrive/mhealth-data'
RAW_DATA_PATH = f'{DRIVE_BASE}/raw'
PROCESSED_DATA_PATH = f'{DRIVE_BASE}/processed'
MODELS_PATH = f'{DRIVE_BASE}/models'
RESULTS_PATH = f'{DRIVE_BASE}/results'

# Create directories if they don't exist
os.makedirs(RAW_DATA_PATH, exist_ok=True)
os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)
os.makedirs(MODELS_PATH, exist_ok=True)
os.makedirs(RESULTS_PATH, exist_ok=True)

# Create symlinks for easy access
!ln -sf {DRIVE_BASE} /content/mhealth-data-privacy/data

print(f"\nData paths configured:")
print(f"  Raw data: {RAW_DATA_PATH}")
print(f"  Processed data: {PROCESSED_DATA_PATH}")
print(f"  Models: {MODELS_PATH}")
print(f"  Results: {RESULTS_PATH}")
print(f"\nSymlink created: /content/mhealth-data-privacy/data -> {DRIVE_BASE}")


In [None]:
# ============================================================================
# STEP 5: Verify Setup
# ============================================================================

print("="*70)
print("VERIFYING SETUP")
print("="*70)

import sys
import importlib

# Check if we can import our modules
try:
    from src.preprocessing import sleep_edf, wesad
    from src.models import lstm_baseline
    from src.privacy import dp_training, fl_training
    from src.evaluation import metrics, visualization
    print("✅ All modules imported successfully!")
except ImportError as e:
    print(f"❌ Import error: {e}")

# Check Python version
print(f"\nPython version: {sys.version}")

# Check if GPU is available
try:
    import tensorflow as tf
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        print(f"✅ GPU available: {gpus[0].name}")
        # Configure GPU memory growth
        tf.config.experimental.set_memory_growth(gpus[0], True)
    else:
        print("⚠️  No GPU available - training will be slower")
except Exception as e:
    print(f"❌ TensorFlow error: {e}")

# Check data directories
print(f"\nData directory structure:")
if os.path.exists(DRIVE_BASE):
    for root, dirs, files in os.walk(DRIVE_BASE):
        level = root.replace(DRIVE_BASE, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files[:5]:  # Show first 5 files
            print(f"{subindent}{file}")
        if len(files) > 5:
            print(f"{subindent}... and {len(files) - 5} more files")
else:
    print(f"⚠️  Data directory not found: {DRIVE_BASE}")
    print("   Make sure to create the directory structure in Google Drive")

print("\n" + "="*70)
print("SETUP COMPLETE!")
print("="*70)
print("\nYou can now run other notebooks or start working with the data.")
print("\nNext steps:")
print("1. Run notebook 01_preprocess_sleep_edf.ipynb (if not done yet)")
print("2. Run notebook 02_preprocess_wesad.ipynb (if not done yet)")
print("3. Run training notebooks (03, 04, 05)")
print("4. Run analysis notebook (06)")
