# UAT for Cycle 01: Core Engine Workflow

This notebook verifies the core functionality of the MLIP-AutoPipe system for Cycle 01. It tests the linear workflow of populating a database, labeling structures with a mocked DFT call, and training a model.

In [None]:
from pathlib import Path

import ase.db
import yaml
from ase import Atoms
from ase.io import write
from click.testing import CliRunner

from mlip_autopipec.cli import main

# Ensure we are in a clean state for the test
if Path('model.ace').exists():
    Path('model.ace').unlink()

## GIVEN: Setup Input Files

In [None]:
# GIVEN a YAML configuration file
config_data = {
    "system": {"elements": ["Si"]},
    "dft_compute": {
        "code": "quantum_espresso",
        "command": "pw.x -v",
        "pseudopotentials": "SSSP",
        "ecutwfc": 60.0,
        "ecutrho": 240.0,
        "kpoints_density": 1.0,
        "smearing": "mv",
        "degauss": 0.01,
    },
    "mlip_training": {
        "model_type": "ace",
        "r_cut": 5.0,
        "delta_learning": True,
        "base_potential": "lj_auto",
        "loss_weights": {"energy": 1.0, "force": 100.0},
    },
}
config_path = Path("exec_config_dump_c01.yaml")
with open(config_path, "w") as f:
    yaml.dump(config_data, f)

# GIVEN an XYZ file with initial structures
atoms1 = Atoms("Si", cell=[5.4, 5.4, 5.4], pbc=True)
atoms2 = atoms1.copy()
atoms2.rattle(0.1)
structures_path = Path("initial_structures.xyz")
write(structures_path, [atoms1, atoms2], format='extxyz')

# GIVEN an empty ASE database
db_path = Path("uat_c01.db")
if db_path.exists():
    db_path.unlink()

print("Input files created successfully:")
print(f"- {config_path}")
print(f"- {structures_path}")
print(f"- {db_path} (to be created by the CLI)")

## WHEN: The CLI is executed

In [None]:
# We need to mock the external subprocess call for the UAT
import subprocess
from unittest.mock import patch

SAMPLE_QE_OUTPUT = """ 
!    total energy              =     -15.85217439 Ry
Forces acting on atoms (cartesian axes, Ry/au):
     atom    1   force =     -0.00000014    -0.00000014    -0.00000014
total   stress  (Ry/bohr**3)                (kbar)     P=      -0.34
      -0.00000215    -0.00000000     0.00000000
      -0.00000000    -0.00000215     0.00000000
       0.00000000     0.00000000    -0.00000215
"""

with patch("subprocess.run", return_value=subprocess.CompletedProcess(args=[], returncode=0, stdout=SAMPLE_QE_OUTPUT, stderr="")) as mock_run:
    runner = CliRunner()
    result = runner.invoke(
        main,
        [
            "--config-file", str(config_path),
            "--database-file", str(db_path),
            "--input-file", str(structures_path),
        ],
    )

print("--- CLI Output ---")
print(result.output)
print("--------------------\n")
assert result.exit_code == 0
print("âœ… CLI command executed successfully.")

## THEN: Verify the Outputs

In [None]:
# AND the ASE database file should be updated
assert db_path.exists()
print(f"âœ… Database file '{db_path}' was created.")

db = ase.db.connect(db_path)
rows = list(db.select())

# AND querying the database should show exactly 2 rows
assert len(rows) == 2
print(f"âœ… Database contains {len(rows)} rows as expected.")

# AND each row should have the state 'labeled' and contain data
for _, row in enumerate(rows):
    assert row.key_value_pairs['state'] == 'labeled'
    assert 'energy' in row.data
    assert 'forces' in row.data
print("âœ… All rows are marked as 'labeled' and contain energy/force data.")

# AND a new model file should be created
model_path = Path('model.ace')
assert model_path.exists()
print(f"âœ… Trained model file '{model_path}' was created.")

# Clean up the generated files
config_path.unlink()
structures_path.unlink()
db_path.unlink()
model_path.unlink()
print("\nðŸ§¼ Cleanup complete.")