In [None]:
import marimo as mo

In [None]:
import os
import sys
import shutil
import tempfile
import atexit
import importlib.util
import uuid
from pathlib import Path
import warnings
import logging

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
PathRef = Path
# Return only what is used in other cells

In [None]:
# Explicitly check for required dependencies before proceeding
pkg_map = {
    "pyyaml": "yaml",
}

# CRITICAL LOGIC CHECK: Ensure 'pyacemaker' is installed OR available in src
# We check it first to fail fast.
has_pyacemaker_pkg = False
pyacemaker_spec = None # Initialize to avoid NameError
try:
    pyacemaker_spec = importlib.util.find_spec("pyacemaker")
    if pyacemaker_spec is not None:
        has_pyacemaker_pkg = True
except (ImportError, AttributeError):
    pass

if not has_pyacemaker_pkg:
    # Check if we are in repo root and can add src
    # We look for src/pyacemaker relative to CWD
    src_exists = Path("src/pyacemaker").exists() or Path("../src/pyacemaker").exists()

    if src_exists:
        print("Found source directory. Will attempt to load from there.")
    else:
        mo.md(
            """
            ::: error
            **CRITICAL ERROR: `pyacemaker` is not installed.**

            This tutorial requires the `pyacemaker` package to be installed in the environment or the source code to be present in `src/`.

            **Installation Instructions:**
            1.  Open your terminal.
            2.  Navigate to the project root.
            3.  Run:
                ```bash
                uv sync
                # OR
                pip install -e .[dev]
                ```
            4.  Restart this notebook.
            :::
            """
        )
        # We don't raise error here if src exists, we let path_setup handle it
        pass

required_packages = ["ase", "numpy", "matplotlib", "pyyaml", "pydantic"]
missing = []

for pkg in required_packages:
    module_name = pkg_map.get(pkg, pkg)
    if importlib.util.find_spec(module_name) is None:
        missing.append(pkg)

if missing:
    error_msg = f"Missing Dependencies: {', '.join(missing)}"
    mo.md(
        f"""
        ::: error
        **CRITICAL ERROR: {error_msg}**

        The tutorial cannot proceed without these packages.

        **Action Required:**
        ```bash
        uv sync
        # OR
        pip install -e .[dev]
        ```
        :::
        """
    )
    # Halt execution by raising an error if run as a script/notebook
    raise ImportError(error_msg)
else:
    print("All required packages found.")

In [None]:
# CONSTITUTION CHECK: Graceful handling of API Keys
mp_api_key = None
has_api_key = False
api_key_status = None

if os is not None:
    mp_api_key = os.environ.get("MP_API_KEY")

    if mp_api_key:
        has_api_key = True
        api_key_status = mo.md(
            "::: success\n✅ **MP_API_KEY found.** Advanced exploration strategies enabled.\n:::"
        )
        print("✅ MP_API_KEY found.")
    else:
        api_key_status = mo.md(
            """
            ::: warning
            **Missing API Key: `MP_API_KEY`**

            The **Materials Project API Key** was not found in the environment variables.

            *   **Impact**: Strategies relying on M3GNet/Materials Project (e.g., "smart" Cold Start) will be disabled or mocked.
            *   **Fallback**: We will default to the **'Random'** exploration strategy, which generates random structures. This ensures the tutorial runs without errors.
            *   **How to Fix**:
                1.  **Get a Key**: Sign up at [Materials Project](https://next-gen.materialsproject.org/api) to get your API key.
                2.  **Set Environment Variable**:
                    *   **Linux/Mac**: Run `export MP_API_KEY='your_key_here'` in your terminal before starting Marimo.
                    *   **Windows**: Set the environment variable in System Properties or PowerShell (`$env:MP_API_KEY='your_key_here'`).
            :::
            """
        )
        print("⚠️ No MP_API_KEY. Defaulting to 'Random' strategy.")
else:
    print("⚠️ Warning: `os` module not available. Cannot check environment variables.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Set random seed for reproducibility
# NOTE: In Marimo, this cell runs once unless variables change.
# To be safe, we re-seed in stochastic cells if necessary,
# but global seeding here covers most cases.
np.random.seed(42)

In [None]:
current_wd = None
possible_src_paths = []
src_path = None

if PathRef is not None and sys is not None:
    # Locate src directory
    # Rename to avoid global scope conflict with setup_config
    current_wd = PathRef.cwd()
    possible_src_paths = [
        current_wd / "src",
        current_wd.parent / "src",
    ]

    for p in possible_src_paths:
        if (p / "pyacemaker" / "__init__.py").exists():
            src_path = p
            break

    if src_path:
        if str(src_path) not in sys.path:
            sys.path.append(str(src_path))
            print(f"Added {src_path} to sys.path")
    else:
        # Only warn if verify_packages didn't find it installed either
        pass
else:
    mo.md("::: error\n**Fatal Error**: Standard libraries `pathlib` or `sys` are not available.\n:::")

In [None]:
# Dummy usage to enforce dependency
_ = src_path

# Initialize variables to avoid UnboundLocalError
CONSTANTS = None
Orchestrator = None
PYACEMAKERConfig = None
Potential = None
PotentialHelper = None
StructureMetadata = None
StructureStatus = None
StructureGenerator = None
BaseModule = None
Metrics = None
ModuleResult = None
metadata_to_atoms = None
pyacemaker = None
HAS_PYACEMAKER = False

error_md = None

try:
    # 1. Base Import
    import pyacemaker

    # 2. Core Config
    from pyacemaker.core.config import PYACEMAKERConfig, CONSTANTS

    # 3. Orchestrator
    from pyacemaker.orchestrator import Orchestrator

    # 4. Domain Models
    from pyacemaker.domain_models.models import (
        Potential,
        StructureMetadata,
        StructureStatus,
    )

    # 5. Dynamics (PotentialHelper is in modules.dynamics_engine)
    from pyacemaker.modules.dynamics_engine import PotentialHelper

    # 6. Utils
    from pyacemaker.core.utils import metadata_to_atoms

    # 7. Core Interfaces & Base
    from pyacemaker.core.interfaces import StructureGenerator
    from pyacemaker.core.base import BaseModule, Metrics, ModuleResult

    HAS_PYACEMAKER = True
    print(f"Successfully imported pyacemaker components from {pyacemaker.__file__}")

except ImportError as e:
    HAS_PYACEMAKER = False
    error_md = mo.md(
        f"""
        ::: error
        **Import Error**: {e}

        Failed to import a specific module from `pyacemaker`. This usually indicates a broken installation or version mismatch.
        :::
        """
    )
except Exception as e:
    HAS_PYACEMAKER = False
    error_md = mo.md(f"::: error\n**Unexpected Error:** {e}\n:::")

In [None]:
# Dependency Check
required_binaries = ["pw.x", "lmp", "pace_train"]
found_binaries = {}
missing_binaries = []

IS_CI = True  # Default safe
mode_name = "Mock Mode (CI)"
raw_ci = "true"
valid_true = ["true", "1", "yes", "on"]
valid_false = ["false", "0", "no", "off"]
status_md = ""
fallback_msg = None

if os is not None and shutil is not None:
    for binary in required_binaries:
        bin_path = shutil.which(binary)
        if bin_path:
            found_binaries[binary] = bin_path
        else:
            missing_binaries.append(binary)

    # Detect Mode
    # Default to CI/Mock mode if not explicitly set to false/0/no/off
    raw_ci = os.environ.get("CI", "true").strip().lower()

    # Initial decision based on Env Var
    if raw_ci in valid_true:
        IS_CI = True
    elif raw_ci in valid_false:
        IS_CI = False
    else:
        IS_CI = True  # Default safe

    # Force Mock Mode if binaries are missing (Logic Update: Explicit Fallback)
    if missing_binaries:
        if not IS_CI:
            print(
                "Missing binaries detected. Switching to Mock Mode."
            )  # Visible in logs
            fallback_msg = mo.md(
                f"""
                ::: warning
                **Missing Binaries:** {", ".join(missing_binaries)}

                **FALLBACK TRIGGERED**: Switching to **Mock Mode** despite `CI={raw_ci}` because required simulation tools are not found in PATH.

                **To Run in Real Mode:**
                You must install the external physics codes:
                1.  **Quantum Espresso (`pw.x`)**: [Installation Guide](https://www.quantum-espresso.org/Doc/user_guide/node10.html)
                2.  **LAMMPS (`lmp`)**: [Installation Guide](https://docs.lammps.org/Install.html)
                3.  **Pacemaker (`pace_train`)**: [Installation Guide](https://pacemaker.readthedocs.io/en/latest/)

                After installation, ensure they are in your system `$PATH` and restart this notebook.
                :::
                """
            )
        IS_CI = True

    mode_name = "Mock Mode (CI)" if IS_CI else "Real Mode (Production)"

    # Render Status Table
    status_md = f"""
    ### System Status: **{mode_name}**

    | Binary | Status | Path |
    | :--- | :--- | :--- |
    """
    for binary in required_binaries:
        if binary in found_binaries:
            status_md += (
                f"| `{binary}` | ✅ Found | `{found_binaries[binary]}` |\n"
            )
        else:
            status_md += f"| `{binary}` | ❌ Missing | - |\n"

    mo.md(status_md)
else:
    mo.md(
        "::: error\n**Fatal Error**: Standard libraries `os` or `shutil` are not available.\n:::"
    )

In [None]:
mo.md(
    """
    ::: danger
    **SECURITY WARNING: MOCK DATA GENERATION**

    The following constant defines dummy content for Pseudopotential (`.UPF`) files.
    This is **strictly for testing/CI environments** where real physics data is unavailable.

    **Why Mock Data?** Real pseudopotentials are large binary files that may have licensing restrictions. In Mock Mode, we generate harmless placeholders to ensure the file I/O logic of the pipeline works correctly without needing actual physics data.

    **NEVER** use these dummy files for actual scientific calculations as they will produce meaningless results.
    :::
    """
)
# Constant definition for Mock Data Security
# Minimal content to satisfy file existence checks without mimicking real physics data
SAFE_DUMMY_UPF_CONTENT = "# MOCK UPF FILE: FOR TESTING PURPOSES ONLY. DO NOT USE FOR PHYSICS."

In [None]:
config = None
config_dict = None
pseudos = None
strategy = "random"  # Default strategy
tutorial_dir = None
tutorial_tmp_dir = None
setup_msg = None

if (
    PathRef is None
    or atexit is None
    or tempfile is None
    or uuid is None
    or os is None
):
    setup_msg = mo.md(
        "::: error\n**Fatal Error**: Standard libraries `pathlib`, `atexit`, `tempfile`, `uuid`, or `os` are not available.\n:::"
    )
elif HAS_PYACEMAKER and PYACEMAKERConfig:
    try:
        # Check for write permissions in CWD
        cwd = PathRef.cwd()
        if not os.access(cwd, os.W_OK):
            raise PermissionError(
                f"Current working directory '{cwd}' is not writable. Cannot create temporary workspace."
            )

        # Create temporary directory in CWD for security compliance (Pydantic validation requires path inside CWD)
        # Use full hex for robustness
        unique_suffix = uuid.uuid4().hex
        tutorial_tmp_dir = tempfile.TemporaryDirectory(
            prefix=f"pyacemaker_tutorial_{unique_suffix}_", dir=cwd
        )
        tutorial_dir = PathRef(tutorial_tmp_dir.name)

        # Register cleanup on exit to ensure directory is removed even on crash
        def _cleanup_handler():
            try:
                if tutorial_tmp_dir:
                    tutorial_tmp_dir.cleanup()
                    print(f"Cleanup: Removed {tutorial_dir}")
            except Exception:
                pass

        atexit.register(_cleanup_handler)

        setup_msg = mo.md(f"Initializing Tutorial Workspace at: `{tutorial_dir}`")

        pseudos = {
            "Fe": "Fe.pbe.UPF",
            "Pt": "Pt.pbe.UPF",
            "Mg": "Mg.pbe.UPF",
            "O": "O.pbe.UPF",
        }

        if IS_CI:
            print("creating dummy upf files")
            # Security: Ensure content is static and harmless
            for element, filename in pseudos.items():
                pseudo_path = tutorial_dir / filename
                if not pseudo_path.exists():
                    with open(pseudo_path, "w") as f:
                        f.write(SAFE_DUMMY_UPF_CONTENT)

        # Determine strategy based on API key availability
        # Logic: If no API key, force "random" to avoid M3GNet errors.
        if has_api_key and not IS_CI:
            # In Real Mode with API Key, we could use adaptive
            # For consistency in tutorial, we stick to random but log it
            print(
                "API Key present. 'adaptive' strategy is available, but using 'random' for tutorial consistency."
            )

        # Define configuration
        config_dict = {
            "version": "0.1.0",
            "project": {"name": "FePt_MgO", "root_dir": str(tutorial_dir)},
            "logging": {"level": "INFO"},
            "orchestrator": {"max_cycles": 2 if IS_CI else 10},
            "oracle": {
                "dft": {
                    "pseudopotentials": {
                        k: str(tutorial_dir / v) if IS_CI else v
                        for k, v in pseudos.items()
                    }
                },
                "mock": IS_CI,
            },
            "trainer": {
                "potential_type": "pace",
                "mock": IS_CI,
                "max_epochs": 1,
            },
            "dynamics_engine": {
                "engine": "lammps",
                "mock": IS_CI,
                "gamma_threshold": 0.5,
                "timestep": 0.001,
                "n_steps": 100,
            },
            "structure_generator": {"strategy": strategy},  # Dynamic strategy
            "validator": {"test_set_ratio": 0.1},
        }
        config = PYACEMAKERConfig(**config_dict)
        (tutorial_dir / "data").mkdir(exist_ok=True, parents=True)
    except Exception as e:
        setup_msg = mo.md(
            f"::: error\n**Setup Failed:** Could not create temporary directory or config. {e}\n:::"
        )

In [None]:
TutorialStructureGenerator = None

if StructureGenerator is not None:

    class TutorialStructureGenerator(StructureGenerator):
        """Custom generator for Fe/Pt on MgO tutorial.

        Ensures realistic structures are used even in Mock Mode.
        """

        def run(self) -> ModuleResult:
            return ModuleResult(status="success", metrics=Metrics())

        def generate_initial_structures(self):
            """Generate initial structures (MgO, Fe, Pt, MgO surface)."""
            # Use ase.build inside method to avoid global scope issues if not imported
            from ase.build import bulk, surface

            # 1. MgO Bulk
            atoms = bulk("MgO", "rocksalt", a=4.21)
            yield self._wrap(atoms, "initial_MgO_bulk")

            # 2. Fe Bulk
            atoms = bulk("Fe", "bcc", a=2.87)
            yield self._wrap(atoms, "initial_Fe_bulk")

            # 3. Pt Bulk
            atoms = bulk("Pt", "fcc", a=3.92)
            yield self._wrap(atoms, "initial_Pt_bulk")

            # 4. MgO Surface
            atoms = surface(bulk("MgO", "rocksalt", a=4.21), (0, 0, 1), 2)
            atoms.center(vacuum=10.0, axis=2)
            yield self._wrap(atoms, "initial_MgO_surf")

        def _wrap(self, atoms, tag):
            return StructureMetadata(
                features={"atoms": atoms},
                tags=[tag, "tutorial"],
                status=StructureStatus.NEW,
            )

        def generate_local_candidates(self, seed, n_candidates, cycle=1):
            """Generate perturbed candidates."""
            if not seed or "atoms" not in seed.features:
                return

            atoms_ref = seed.features["atoms"]
            for i in range(n_candidates):
                atoms = atoms_ref.copy()
                atoms.rattle(stdev=0.1)
                yield self._wrap(atoms, f"candidate_c{cycle}_{i}")

        def generate_batch_candidates(self, seeds, n_candidates_per_seed, cycle=1):
            for s in seeds:
                yield from self.generate_local_candidates(
                    s, n_candidates_per_seed, cycle
                )

        def get_strategy_info(self):
            return {"strategy": "tutorial_custom"}

In [None]:
orchestrator = None
results = []  # Define at start to ensure it exists in cell scope
metrics_dict = None
module_result = None
sim_output = None

# Robust checks
if not HAS_PYACEMAKER:
    sim_output = mo.md(
        "::: warning\nSkipping simulation: `pyacemaker` not available.\n:::"
    )
elif Orchestrator is None:
    sim_output = mo.md(
        "::: error\n**Fatal Error**: `Orchestrator` class not found.\n:::"
    )
elif config is None:
    sim_output = mo.md(
        "::: error\n**Fatal Error**: Configuration `config` is None.\n:::"
    )
else:
    # Step 1: Initialization
    try:
        # Use custom generator if available to ensure realistic structures
        gen_instance = None
        if TutorialStructureGenerator:
            gen_instance = TutorialStructureGenerator(config)
            print("Using Custom Tutorial Structure Generator (Fe/Pt/MgO).")

        orchestrator = Orchestrator(config, structure_generator=gen_instance)
        print("Orchestrator Initialized successfully.")
    except Exception as e:
        sim_output = mo.md(
            f"""
            ::: error
            **Initialization Error:**
            Failed to initialize the Orchestrator. Please check your configuration.

            Details: `{e}`
            :::
            """
        )
        # Orchestrator remains None

    # Step 2: Execution (only if initialized)
    if orchestrator is not None:
        try:
            print("Starting Active Learning Pipeline...")

            # Use the high-level run() method to execute the full pipeline
            module_result = orchestrator.run()

            print(f"Pipeline finished with status: {module_result.status}")

            # Extract cycle history from metrics for visualization
            if module_result and module_result.metrics:
                metrics_dict = module_result.metrics.model_dump()
                results = metrics_dict.get("history", [])
            else:
                print("Warning: No metrics returned from pipeline.")

            if not results:
                print("Warning: No cycle history found in results.")

        except Exception as e:
            sim_output = mo.md(
                f"""
                ::: error
                **Runtime Error:**
                The Active Learning Pipeline failed during execution.

                Details: `{e}`
                :::
                """
            )
            print(f"Critical Runtime Error: {e}")

In [None]:
data = None
rmse_values = None
v = None
fig_training = None

if HAS_PYACEMAKER and results and plt:
    rmse_values = []
    for metrics in results:
        v = 0.0
        # Defensive programming: Handle various potential formats of metrics
        if hasattr(metrics, "rmse_energy"):
            v = getattr(metrics, "rmse_energy", 0.0)
        elif hasattr(metrics, "energy_rmse"):
            v = getattr(metrics, "energy_rmse", 0.0)

        # If still 0.0 or not found, try Pydantic dump
        if v == 0.0 and hasattr(metrics, "model_dump"):
            try:
                data = metrics.model_dump()
                v = data.get("rmse_energy", data.get("energy_rmse", 0.0))
            except Exception:
                pass

        rmse_values.append(v)

    plt.figure(figsize=(8, 4))
    plt.plot(range(1, len(results) + 1), rmse_values, "b-o")
    plt.title("Training Convergence")
    plt.xlabel("Cycle")
    plt.ylabel("RMSE (eV/atom)")
    plt.grid(True)
    # In Marimo, plt.gca() or plt.gcf() is captured automatically.
    # Explicitly returning the figure is good practice.
    fig_training = plt.gcf()
    plt.show() # Ensure display in standard output contexts if needed

In [None]:
# We print the markdown here because we return variables.
# In Marimo interactive mode, this markdown might not be prominently displayed
# if not returned, but we need to return variables.
# Using print as fallback for logs.
print("Running Analysis: L10 Ordering Phase Transition (Mock)")

order_param = None
time_steps = None
fig_analysis = None
analysis_output = None

if HAS_PYACEMAKER and np and plt:
    # Mock data for visualization
    time_steps = np.linspace(0, 1e6, 50)
    # Sigmoid function to simulate ordering transition
    # Ensure exponent is within reasonable bounds to avoid overflow
    exponent = -1e-5 * (time_steps - 3e5)
    # Clip exponent to avoid overflow in exp (e.g., -700 to 700 usually safe for float64)
    exponent = np.clip(exponent, -100, 100)
    order_param = 1.0 / (1.0 + np.exp(exponent))

    plt.figure(figsize=(8, 4))
    plt.plot(time_steps, order_param, "r-", linewidth=2, label="Order Parameter")
    plt.title("L10 Ordering Phase Transition (Mock)")
    plt.xlabel("Time (us)")
    plt.ylabel("Order Parameter (0=Disordered, 1=L10)")
    plt.grid(True, alpha=0.3)
    plt.legend()
    fig_analysis = plt.gcf()
    plt.show()
elif not HAS_PYACEMAKER:
    analysis_output = mo.md(
        "::: warning\nSkipping Analysis: `pyacemaker` not available.\n:::"
    )

In [None]:
if tutorial_tmp_dir:
    try:
        tutorial_tmp_dir.cleanup()
        print("Cleanup: Done.")
    except Exception as e:
        print(f"Cleanup warning: {e}")
else:
    print("Cleanup: No temporary directory to remove.")