# Deepnote Setup Template

Este notebook contém o setup inicial para todos os outros notebooks do projeto.
**Execute sempre este setup antes de qualquer outro notebook.**

## O que este setup faz:
1. Instala o projeto como package
2. Configura paths para dados locais
3. Cria estrutura de diretórios
4. Verifica dependências
5. Configura GPU (se disponível)


In [None]:
# ============================================================================
# STEP 0: Clone Repository (if needed)
# ============================================================================

import os
import subprocess

print("="*70)
print("CLONING REPOSITORY")
print("="*70)

# Check if we're already in the project directory
if os.path.exists('src') and os.path.exists('requirements.txt'):
    print("✅ Already in project directory!")
else:
    print("📥 Cloning repository...")
    
    # Clone the repository
    try:
        result = subprocess.run([
            'git', 'clone', 
            'https://github.com/vasco-fernandes21/mhealth-data-privacy.git'
        ], capture_output=True, text=True, check=True)
        
        print("✅ Repository cloned successfully!")
        
        # Change to project directory
        os.chdir('mhealth-data-privacy')
        print("📁 Changed to project directory")
        
    except subprocess.CalledProcessError as e:
        print(f"❌ Error cloning repository: {e}")
        print("Please clone manually or check the repository URL")
    except FileNotFoundError:
        print("❌ Git not available. Please clone manually:")
        print("git clone https://github.com/vasco-fernandes21/mhealth-data-privacy.git")
        print("cd mhealth-data-privacy")

print(f"Current directory: {os.getcwd()}")


In [None]:
# ============================================================================
# STEP 1: Environment Setup
# ============================================================================

import os
import sys
from pathlib import Path

print("="*70)
print("DEEPNOTE ENVIRONMENT SETUP (CPU)")
print("="*70)

# Get current directory
current_dir = os.getcwd()
print(f"Current directory: {current_dir}")

# Try to find the project directory
project_found = False
project_dir = None

# Check current directory first
if os.path.exists('src') and os.path.exists('requirements.txt'):
    project_found = True
    project_dir = current_dir
    print("✅ Project directory detected in current location!")
else:
    # Look for mhealth-data-privacy directory
    for item in os.listdir('.'):
        if os.path.isdir(item) and 'mhealth' in item.lower():
            potential_dir = os.path.join(current_dir, item)
            if os.path.exists(os.path.join(potential_dir, 'src')) and os.path.exists(os.path.join(potential_dir, 'requirements.txt')):
                project_found = True
                project_dir = potential_dir
                print(f"✅ Project directory found: {item}")
                break

if project_found:
    # Change to project directory if needed
    if project_dir != current_dir:
        os.chdir(project_dir)
        print(f"📁 Changed to project directory: {project_dir}")
    
    print("\nProject structure:")
    for item in os.listdir('.'):
        if os.path.isdir(item):
            print(f"  📁 {item}/")
        else:
            print(f"  📄 {item}")
else:
    print("❌ Project directory not found!")
    print("Please ensure you're in the mhealth-data-privacy folder or clone the repository.")
    print("Expected files: src/, requirements.txt, setup.py")


In [None]:
# ============================================================================
# STEP 2: Install Dependencies
# ============================================================================

print("="*70)
print("INSTALLING DEPENDENCIES")
print("="*70)

# Install from requirements.txt
print("Installing dependencies from requirements.txt...")
!pip install -r requirements.txt

# Install the project as an editable package
print("\nInstalling project package...")
!pip install -e .

print("\n✅ Dependencies installed successfully!")
print("You can now import modules with: from src.models import lstm_baseline")


In [None]:
# ============================================================================
# STEP 3: Setup Data Paths
# ============================================================================

print("="*70)
print("SETTING UP DATA PATHS")
print("="*70)

# Define local data paths (Deepnote storage)
DATA_BASE = './data'
RAW_DATA_PATH = f'{DATA_BASE}/raw'
PROCESSED_DATA_PATH = f'{DATA_BASE}/processed'
MODELS_PATH = f'{DATA_BASE}/models'
RESULTS_PATH = f'{DATA_BASE}/results'

# Create directories if they don't exist
os.makedirs(RAW_DATA_PATH, exist_ok=True)
os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)
os.makedirs(MODELS_PATH, exist_ok=True)
os.makedirs(RESULTS_PATH, exist_ok=True)

print(f"\nData paths configured:")
print(f"  Raw data: {RAW_DATA_PATH}")
print(f"  Processed data: {PROCESSED_DATA_PATH}")
print(f"  Models: {MODELS_PATH}")
print(f"  Results: {RESULTS_PATH}")

# Create subdirectories for datasets
for dataset in ['sleep-edf', 'wesad']:
    os.makedirs(f'{RAW_DATA_PATH}/{dataset}', exist_ok=True)
    os.makedirs(f'{PROCESSED_DATA_PATH}/{dataset}', exist_ok=True)
    os.makedirs(f'{MODELS_PATH}/{dataset}', exist_ok=True)
    os.makedirs(f'{RESULTS_PATH}/{dataset}', exist_ok=True)

print(f"\n✅ Directory structure created successfully!")

# Make variables globally available
globals()['DATA_BASE'] = DATA_BASE
globals()['RAW_DATA_PATH'] = RAW_DATA_PATH
globals()['PROCESSED_DATA_PATH'] = PROCESSED_DATA_PATH
globals()['MODELS_PATH'] = MODELS_PATH
globals()['RESULTS_PATH'] = RESULTS_PATH


In [None]:
# ============================================================================
# STEP 4: Configure CPU Environment
# ============================================================================

print("="*70)
print("CONFIGURING CPU ENVIRONMENT")
print("="*70)

try:
    import tensorflow as tf
    
    # Check GPU availability
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        print(f"✅ GPU available: {gpus[0].name}")
        print("   Using GPU for faster training")
        
        # Configure GPU memory growth
        try:
            tf.config.experimental.set_memory_growth(gpus[0], True)
            print("✅ GPU memory growth configured")
        except RuntimeError as e:
            print(f"⚠️  GPU memory growth configuration failed: {e}")
    else:
        print("🖥️  CPU-only environment detected")
        print("   Training will be slower but fully functional")
        print("   Expected times: Baseline ~30-60min, DP ~45-90min, FL ~20-40min")
        
        # Configure for CPU optimization
        tf.config.threading.set_inter_op_parallelism_threads(0)  # Use all available cores
        tf.config.threading.set_intra_op_parallelism_threads(0)  # Use all available cores
        print("✅ CPU threading optimized for all available cores")
        
except ImportError:
    print("❌ TensorFlow not installed yet")
except Exception as e:
    print(f"❌ TensorFlow configuration error: {e}")

print(f"\nTensorFlow version: {tf.__version__ if 'tf' in locals() else 'Not installed'}")
print(f"CPU cores available: {os.cpu_count() if 'os' in locals() else 'Unknown'}")


In [None]:
# ============================================================================
# STEP 5: Verify Setup
# ============================================================================

print("="*70)
print("VERIFYING SETUP")
print("="*70)

import sys
import importlib

# Check if we can import our modules
try:
    from src.preprocessing import sleep_edf, wesad
    from src.models import lstm_baseline
    from src.privacy import dp_training, fl_training
    from src.evaluation import metrics, visualization
    print("✅ All modules imported successfully!")
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("   Make sure you ran the installation step above")

# Check Python version
print(f"\nPython version: {sys.version}")

# Check data directories (use local variable if DATA_BASE not defined)
data_base = './data' if 'DATA_BASE' not in globals() else DATA_BASE
print(f"\nData directory structure:")
if os.path.exists(data_base):
    for root, dirs, files in os.walk(data_base):
        level = root.replace(data_base, '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files[:5]:  # Show first 5 files
            print(f"{subindent}{file}")
        if len(files) > 5:
            print(f"{subindent}... and {len(files) - 5} more files")
else:
    print(f"⚠️  Data directory not found: {data_base}")
    print("   This is normal for first run - directories will be created when needed")

# Check available space
try:
    import shutil
    total, used, free = shutil.disk_usage(".")
    print(f"\nStorage information:")
    print(f"  Total: {total // (1024**3)} GB")
    print(f"  Used: {used // (1024**3)} GB") 
    print(f"  Free: {free // (1024**3)} GB")
except Exception as e:
    print(f"⚠️  Could not check storage: {e}")

print("\n" + "="*70)
print("DEEPNOTE SETUP COMPLETE!")
print("="*70)
print("\nYou can now run other notebooks or start working with the data.")
print("\nNext steps:")
print("1. Upload raw data to ./data/raw/ directory")
print("2. Run notebook 01_preprocess_sleep_edf.ipynb")
print("3. Run notebook 02_preprocess_wesad.ipynb")
print("4. Run training notebooks (03, 04, 05)")
print("5. Run analysis notebook (06)")
print("\n💡 Tip: Use Deepnote's file upload feature to add your datasets!")
