# UAT-C2-022: Verify Farthest Point Sampling (FPS)

In [None]:
import numpy as np
from ase import Atoms

# 1. Create a controlled, artificial dataset
# Three structures are very similar, and one is very different.
atoms1 = Atoms("H2", positions=[[0, 0, 0], [0, 0, 0.74]])
atoms2 = atoms1.copy()
atoms2.rattle(stdev=0.01, seed=42)
atoms3 = atoms1.copy()
atoms3.rattle(stdev=0.01, seed=123)

# This structure is highly distinct from the others
atoms4 = Atoms("H2", positions=[[0, 0, 0], [0, 0, 2.0]])

manual_structures = [atoms1, atoms2, atoms3, atoms4]
print(f"Created {len(manual_structures)} manual structures for testing.")

In [None]:
# 2. Import and run the FPSSampler
from mlip_autopipec.sampling.samplers import FPSSampler

# Initialize the sampler to select the 2 most diverse samples
sampler = FPSSampler(num_samples=2)
selected_structures = sampler.sample(manual_structures)

print(f"Selected {len(selected_structures)} structures using FPS.")

In [None]:
# 3. Verify the selection
# The sampler should pick the two most different structures:
# the original molecule and the highly stretched one.
selected_distances = [s.get_distance(0, 1) for s in selected_structures]
print("Interatomic distances in selected structures:", selected_distances)

expected_distances = [0.74, 2.0]

# Use sets and rounding to compare, making the check robust to order and floating point noise
assert set(np.round(selected_distances, 2)) == set(np.round(expected_distances, 2)), (
    "FPS did not select the most diverse structures!"
)

print("\nSuccess: FPS correctly selected the most structurally diverse configurations.")