# Tutorial 01: Training Base Potentials (MgO & FePt)

**Goal**: Create the "Base" potentials for the substrate (MgO) and the deposit (FePt) separately.

This tutorial guides you through the process of:
1.  Configuring the `StructureGenerator` to create initial atomic structures.
2.  Running the **Active Learning Loop** to train a Machine Learning Interatomic Potential (MLIP).
3.  Validating the resulting potentials against physical properties.

We will execute this in two parts: first for the oxide substrate, then for the metallic alloy.



In [None]:
import os
import shutil
import sys
from pathlib import Path

import numpy as np
import pytest
from ase.build import bulk
from ase.calculators.emt import EMT

from mlip_autopipec.core.orchestrator import Orchestrator
from mlip_autopipec.domain_models.config import (
    GlobalConfig,
    GeneratorConfig,
    OracleConfig,
    TrainerConfig,
    DynamicsConfig,
    ValidatorConfig,
    AdaptiveGeneratorConfig,
    QEOracleConfig,
    PacemakerTrainerConfig,
    LAMMPSDynamicsConfig,
    StandardValidatorConfig,
    MockGeneratorConfig,
    MockOracleConfig,
    MockTrainerConfig,
    MockDynamicsConfig,
    MockValidatorConfig,
    PhysicsBaselineConfig
)
from mlip_autopipec.domain_models.enums import (
    GeneratorType,
    OracleType,
    TrainerType,
    DynamicsType,
    ValidatorType,
)



## 1. Environment Setup

We detect if we are running in a CI environment or on a machine with full DFT/MD codes installed.

- **CI Mode**: Uses Mock components. Fast, verifies logic, but no real physics.
- **Real Mode**: Uses Quantum Espresso, LAMMPS, and Pacemaker. Slow, produces real potentials.



In [None]:
# Detect external tools
def has_command(cmd):
    return shutil.which(cmd) is not None

HAS_QE = has_command("pw.x")
HAS_LAMMPS = has_command("lmp")
HAS_PACEMAKER = has_command("pace_train")

# Force CI mode if tools are missing or explicitly requested
IS_CI_MODE = os.environ.get("IS_CI_MODE", "False").lower() == "true"
if not (HAS_QE and HAS_LAMMPS and HAS_PACEMAKER):
    print("Missing external tools (pw.x, lmp, pace_train). Forcing CI Mode (Mocks).")
    IS_CI_MODE = True

print(f"Running in mode: {'CI / MOCK' if IS_CI_MODE else 'REAL / PHYSICS'}")

# Setup Workdirs
WORKDIR_MGO = Path("workdirs/01_mgo")
WORKDIR_FEPT = Path("workdirs/01_fept")

# Clean previous runs
if WORKDIR_MGO.exists():
    shutil.rmtree(WORKDIR_MGO)
if WORKDIR_FEPT.exists():
    shutil.rmtree(WORKDIR_FEPT)

WORKDIR_MGO.mkdir(parents=True, exist_ok=True)
WORKDIR_FEPT.mkdir(parents=True, exist_ok=True)



## 2. Part A: Training MgO Potential

We configure the pipeline for Magnesium Oxide.

- **Generator**: Adaptive, targeting "rocksalt" structure.
- **Oracle**: Quantum Espresso (PBE functional).
- **Trainer**: Pacemaker (ACE potential).
- **Dynamics**: LAMMPS (for exploration).
- **Validator**: Phonons & Elastic constants.



In [None]:
def create_mgo_config(workdir: Path, is_ci: bool) -> GlobalConfig:
    if is_ci:
        # MOCK CONFIGURATION
        return GlobalConfig(
            workdir=workdir,
            max_cycles=2,
            components={
                "generator": MockGeneratorConfig(
                    name=GeneratorType.MOCK,
                    n_structures=5,
                    cell_size=4.21,
                    n_atoms=8,
                    atomic_numbers=[12, 8] * 4 # Mg4O4
                ),
                "oracle": MockOracleConfig(name=OracleType.MOCK),
                "trainer": MockTrainerConfig(name=TrainerType.MOCK),
                "dynamics": MockDynamicsConfig(
                    name=DynamicsType.MOCK,
                    selection_rate=0.5,
                    simulated_uncertainty=10.0 # Force active learning
                ),
                "validator": MockValidatorConfig(name=ValidatorType.MOCK)
            }
        )
    else:
        # REAL CONFIGURATION
        return GlobalConfig(
            workdir=workdir,
            max_cycles=3, # Short run for tutorial
            physics_baseline=PhysicsBaselineConfig(type="zbl"), # Safety net
            components={
                "generator": AdaptiveGeneratorConfig(
                    name=GeneratorType.ADAPTIVE,
                    n_structures=10,
                    element="MgO",
                    crystal_structure="rocksalt",
                    policy_ratios={"cycle0_bulk": 0.5, "cycle0_surface": 0.5}
                ),
                "oracle": QEOracleConfig(
                    name=OracleType.QE,
                    ecutwfc=40.0,
                    ecutrho=200.0,
                    kspacing=0.05,
                    pseudopotentials={
                        "Mg": "Mg.pbe-n-kjpaw_psl.1.0.0.UPF",
                        "O": "O.pbe-n-kjpaw_psl.1.0.0.UPF"
                    },
                    max_workers=4
                ),
                "trainer": PacemakerTrainerConfig(
                    name=TrainerType.PACEMAKER,
                    cutoff=5.0,
                    max_num_epochs=50
                ),
                "dynamics": LAMMPSDynamicsConfig(
                    name=DynamicsType.LAMMPS,
                    n_steps=5000,
                    uncertainty_threshold=5.0, # Halt if Gamma > 5
                    max_workers=2
                ),
                "validator": StandardValidatorConfig(
                    name=ValidatorType.STANDARD,
                    phonon_supercell=[2, 2, 2]
                )
            }
        )

config_mgo = create_mgo_config(WORKDIR_MGO, IS_CI_MODE)
print("MgO Configuration Ready.")



### Run the Orchestrator (MgO)
This step starts the autonomous loop. In real mode, this can take hours. In CI mode, seconds.



In [None]:
orchestrator_mgo = Orchestrator(config_mgo)
orchestrator_mgo.run()

print("MgO Training Complete.")



## 3. Part B: Training FePt Potential

Now we repeat the process for the FePt alloy, targeting the L1_0 phase.



In [None]:
def create_fept_config(workdir: Path, is_ci: bool) -> GlobalConfig:
    if is_ci:
        return GlobalConfig(
            workdir=workdir,
            max_cycles=2,
            components={
                "generator": MockGeneratorConfig(
                    name=GeneratorType.MOCK,
                    n_structures=5,
                    cell_size=3.8,
                    n_atoms=4,
                    atomic_numbers=[26, 78] * 2
                ),
                "oracle": MockOracleConfig(name=OracleType.MOCK),
                "trainer": MockTrainerConfig(name=TrainerType.MOCK),
                "dynamics": MockDynamicsConfig(
                    name=DynamicsType.MOCK,
                    selection_rate=0.5,
                    simulated_uncertainty=10.0
                ),
                "validator": MockValidatorConfig(name=ValidatorType.MOCK)
            }
        )
    else:
        return GlobalConfig(
            workdir=workdir,
            max_cycles=3,
            components={
                "generator": AdaptiveGeneratorConfig(
                    name=GeneratorType.ADAPTIVE,
                    n_structures=10,
                    element="FePt",
                    crystal_structure="L1_0",
                    policy_ratios={"cycle0_bulk": 0.7, "cycle0_surface": 0.3}
                ),
                "oracle": QEOracleConfig(
                    name=OracleType.QE,
                    ecutwfc=50.0,
                    ecutrho=400.0,
                    kspacing=0.04,
                    pseudopotentials={
                        "Fe": "Fe.pbe-spn-kjpaw_psl.1.0.0.UPF",
                        "Pt": "Pt.pbe-n-kjpaw_psl.1.0.0.UPF"
                    }
                ),
                "trainer": PacemakerTrainerConfig(
                    name=TrainerType.PACEMAKER,
                    cutoff=5.0,
                    max_num_epochs=50
                ),
                "dynamics": LAMMPSDynamicsConfig(
                    name=DynamicsType.LAMMPS,
                    n_steps=5000,
                    uncertainty_threshold=10.0
                ),
                "validator": StandardValidatorConfig(
                    name=ValidatorType.STANDARD,
                    phonon_supercell=[2, 2, 2]
                )
            }
        )

config_fept = create_fept_config(WORKDIR_FEPT, IS_CI_MODE)
orchestrator_fept = Orchestrator(config_fept)
orchestrator_fept.run()

print("FePt Training Complete.")



## 4. Validation & Analysis

We verify the quality of the potentials.
In CI Mode, we use a simple assertion on the mock outputs.
In Real Mode, we would load the `.yace` file and compute the formation energy.



In [None]:
potential_mgo = WORKDIR_MGO / "cycle_02" / "potential.yace" # Assuming 2 cycles
potential_fept = WORKDIR_FEPT / "cycle_02" / "potential.yace"

if IS_CI_MODE:
    # In Mock mode, check if the "potential" file exists (it's a dummy file)
    assert potential_mgo.exists(), "MgO potential not generated"
    assert potential_fept.exists(), "FePt potential not generated"
    print("CI Validation: Potential files exist.")
else:
    if potential_mgo.exists() and potential_fept.exists():
        print("Real Validation: Potentials found.")
        # Here we could load them with `pyace` or `lammps` to calculate properties
        # For this tutorial, existence is the primary success criteria
    else:
        print("Warning: Potentials not found. Did the training converge?")
