
# Tutorial 01: From Scratch to Active Learning - Training Potentials for Interfaces

**Goal**: Demonstrate the "Aha! Moment" where the system automatically learns and improves.

This tutorial guides you through:
1.  **Setup**: Defining the system (MgO substrate, FePt cluster).
2.  **Phase A (Bulk)**: Training simple bulk potentials.
3.  **Phase B (Interface)**: Active Learning for interface configurations.
4.  **Validation**: Inspecting the results.

**Modes**:
*   **CI Mode**: Uses mock components and tiny systems for fast verification.
*   **Real Mode**: Uses actual DFT (Quantum Espresso), Pacemaker, and LAMMPS.


In [None]:

import os
import sys
from pathlib import Path

# Add src to path if running from tutorials directory
if Path("../src").exists():
    sys.path.append(str(Path("../src").resolve()))

from mlip_autopipec.core.orchestrator import Orchestrator
from mlip_autopipec.domain_models.config import (
    GlobalConfig,
    OrchestratorConfig,
    GeneratorConfig,
    OracleConfig,
    TrainerConfig,
    DynamicsConfig,
    ValidatorConfig,
    SystemConfig,
    ActiveLearningConfig
)
from mlip_autopipec.domain_models.enums import (
    ExecutionMode,
    GeneratorType,
    OracleType,
    TrainerType,
    DynamicsType,
    ValidatorType,
    DFTCode
)


In [None]:

# Detect Mode
IS_CI_MODE = os.environ.get("CI", "false").lower() == "true"
print(f"Running in CI Mode: {IS_CI_MODE}")

WORK_DIR = Path("outputs/01_MgO_FePt")
WORK_DIR.mkdir(parents=True, exist_ok=True)


In [None]:

# Step 1: Define Configuration

if IS_CI_MODE:
    # MOCK CONFIGURATION (Fast, no external dependencies)
    config = GlobalConfig(
        orchestrator=OrchestratorConfig(
            max_cycles=1,
            work_dir=WORK_DIR,
            execution_mode=ExecutionMode.MOCK,
            cleanup_on_exit=False
        ),
        generator=GeneratorConfig(
            type=GeneratorType.MOCK,
            mock_count=2
        ),
        oracle=OracleConfig(
            type=OracleType.MOCK
        ),
        trainer=TrainerConfig(
            type=TrainerType.MOCK,
            mock_potential_content="MOCK_POTENTIAL_CONTENT_YACE"
        ),
        dynamics=DynamicsConfig(
            type=DynamicsType.MOCK,
            halt_on_uncertainty=True
        ),
        validator=ValidatorConfig(
            type=ValidatorType.MOCK
        )
    )
else:
    # REAL CONFIGURATION (Requires QE, Pacemaker, LAMMPS)
    # Note: This configuration assumes external binaries are in PATH.
    config = GlobalConfig(
        orchestrator=OrchestratorConfig(
            max_cycles=5,
            work_dir=WORK_DIR,
            execution_mode=ExecutionMode.PRODUCTION,
            cleanup_on_exit=False
        ),
        generator=GeneratorConfig(
            type=GeneratorType.RANDOM, # or M3GNET if available
        ),
        oracle=OracleConfig(
            type=OracleType.DFT,
            dft_code=DFTCode.QUANTUM_ESPRESSO,
            command="mpirun -np 4 pw.x"
        ),
        trainer=TrainerConfig(
            type=TrainerType.PACEMAKER,
            max_epochs=100
        ),
        dynamics=DynamicsConfig(
            type=DynamicsType.LAMMPS,
            halt_on_uncertainty=True
        ),
        validator=ValidatorConfig(
            type=ValidatorType.PHYSICS
        )
    )

print("Configuration defined.")


In [None]:

# Step 2: Initialize Orchestrator
orchestrator = Orchestrator(config)
print("Orchestrator initialized.")


In [None]:

# Step 3: Run the Workflow
# This triggers the Active Learning Loop:
# Explore -> Detect Uncertainty -> Oracle (DFT) -> Train -> Validate

try:
    orchestrator.run()
    print("Workflow completed successfully.")
except Exception as e:
    print(f"Workflow failed: {e}")
    # In CI mode, we might want to raise to fail the test, but let's just print for now
    if IS_CI_MODE:
        raise


In [None]:

# Step 4: Inspect Results
# Check the state of the workflow

state = orchestrator.state_manager.state
print(f"Final Cycle: {state.current_cycle}")
print(f"Active Potential: {state.active_potential_path}")
print(f"Dataset Path: {state.dataset_path}")

# Copy the active potential to a fixed location for Tutorial 02
if state.active_potential_path and state.active_potential_path.exists():
    destination = WORK_DIR / "active_potential.yace"
    import shutil
    shutil.copy(state.active_potential_path, destination)
    print(f"Copied active potential to {destination}")
else:
    print("No active potential found to copy.")
