# Unconstrained geometric minimisation of ROY

In this example notebook, we perform an unconstrained minimisation on the molecule ROY using the MACE-off23 large model as an ASE calculator.

We begin by importing some relevant modules:

In [1]:
from pyzmat import ParseUtils, ZMatrix
import numpy as np

Now, we extract the Z-matrix values (zmat) and connectivity (zmat_conn) from a Gaussian input file. Constraints are also extracted but since the Gaussian input file did not specify any constant DOF, a dummy empty Constraints object is formed. 

In [2]:
zmat, zmat_conn, constraints = ParseUtils.parse_gaussian_input('roy_struc_1.com')

In [3]:
zmat = [['S', None, None, None],
 ['C', 1.7237677157533808, None, None],
 ['C', 1.3680078692775188, 110.54989260706004, None],
 ['C', 1.428797422073703, 112.98693728169714, 359.7734292633197],
 ['C', 1.3571823320693388, 112.92178130887692, 359.34324087405025],
 ['N', 1.3892721711066787, 128.29263165731334, 179.722214897143],
 ['C', 1.3753800394036426, 124.3477799671404, 63.1035858483037],
 ['C', 1.4080208617517107, 122.87361164547156, 187.40152037600996],
 ['C', 1.3930356711983611, 121.46854988642971, 180.16237659456095],
 ['C', 1.3734620260944317, 120.63890346479282, 0.29062864373648795],
 ['C', 1.3938420491916428, 118.72538846891526, 0.025385461880342842],
 ['C', 1.3748204004778162, 121.06971690398859, 359.8158893349512],
 ['N', 1.4617116637063212, 122.17019067834883, 0.12189139964247105],
 ['O', 1.2112955136729107, 118.41705361166251, 174.0115097369849],
 ['C', 1.4955981251316952, 128.10145335430119, 181.52789578088073],
 ['C', 1.4278370433621552, 122.57754874016096, 0.53352137303095],
 ['N', 1.1487638922573373, 152.6494838358117, 180.2343912860882],
 ['O', 1.2283902272239586, 118.76825628984636, 353.6955334590304],
 ['H', 1.077461984471422, 117.98080777338424, 180.29594588757092],
 ['H', 1.0783179228048554, 120.32967072656383, 179.94862717757417],
 ['H', 1.0801923454796103, 119.96741098851925, 179.7843808391676],
 ['H', 1.0784259776358858, 119.94957137852107, 179.80821755229596],
 ['H', 1.007687173412292, 115.80495013666037, 4.51327831370903],
 ['H', 1.0783179055245362, 123.74476572962804, 0.06099150201119088],
 ['H', 1.0884779164802634, 111.31177884506131, 117.58512389929491],
 ['H', 1.0885070587372774, 111.45314534230039, 238.17809321561964],
 ['H', 1.0867475849606212, 109.57500649868533, 357.9282479338791]]

Let's examine the structure of zmat and zmat_conn.
* zmat: list consisting of lists ['element_name', bond_val, angle_val, dih_val]
* zmat_conn: list consisting of tuples ('element_name', ref_i, ref_j, ref_k) where ref_i, ref_j, and ref_k are the line numbers (0-indices) of the reference atoms for bond length, bond angle, and dihedral angle respectively. 

In [4]:
print('zmat:', zmat)
print('')
print('zmat_conn:', zmat_conn)

zmat: [['S', None, None, None], ['C', 1.7237677157533808, None, None], ['C', 1.3680078692775188, 110.54989260706004, None], ['C', 1.428797422073703, 112.98693728169714, 359.7734292633197], ['C', 1.3571823320693388, 112.92178130887692, 359.34324087405025], ['N', 1.3892721711066787, 128.29263165731334, 179.722214897143], ['C', 1.3753800394036426, 124.3477799671404, 63.1035858483037], ['C', 1.4080208617517107, 122.87361164547156, 187.40152037600996], ['C', 1.3930356711983611, 121.46854988642971, 180.16237659456095], ['C', 1.3734620260944317, 120.63890346479282, 0.29062864373648795], ['C', 1.3938420491916428, 118.72538846891526, 0.025385461880342842], ['C', 1.3748204004778162, 121.06971690398859, 359.8158893349512], ['N', 1.4617116637063212, 122.17019067834883, 0.12189139964247105], ['O', 1.2112955136729107, 118.41705361166251, 174.0115097369849], ['C', 1.4955981251316952, 128.10145335430119, 181.52789578088073], ['C', 1.4278370433621552, 122.57754874016096, 0.53352137303095], ['N', 1.1487

We can now form a ZMatrix object using these variables.

In [5]:
roy = ZMatrix(zmat, zmat_conn, constraints, name = 'roy')

Let's visualise this molecule:

In [6]:
roy.view_ase()

To perform the actual minimisation, we must attach an ASE calculator to the ZMatrix object. In this case, we use the MACE-off23 force field ('mace'). 

In [7]:
roy.attach_calculator('mace-omol', model_size = 'large')
# model size defaults to large 



  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))


cuequivariance or cuequivariance_torch is not available. Cuequivariance acceleration will be disabled.
Using float64 for MACECalculator, recommended for geometry optimization.


  torch.load(f=model_path, map_location=device)


Using head omol out of ['omol']


We now run the minimisation routine. The routine defaults to a BFGS line search algorithm, as implemented in ASE (recommended). 

In [8]:
roy.optimise_ase(calc_hess = True)

Initialising minimisation routine
Model used: <mace.calculators.mace.MACECalculator object at 0x79a310a591c0> large
Input Z-matrix:
S
C    1    bnd2
C    2    bnd3    1    ang3
C    3    bnd4    2    ang4    1    dih4
C    4    bnd5    3    ang5    2    dih5
N    2    bnd6    3    ang6    4    dih6
C    6    bnd7    2    ang7    3    dih7
C    7    bnd8    6    ang8    2    dih8
C    8    bnd9    7    ang9    6    dih9
C    9    bnd10    8    ang10    7    dih10
C    10    bnd11    9    ang11    8    dih11
C    11    bnd12    10    ang12    9    dih12
N    8    bnd13    7    ang13    6    dih13
O    13    bnd14    8    ang14    7    dih14
C    5    bnd15    4    ang15    3    dih15
C    3    bnd16    2    ang16    6    dih16
N    16    bnd17    2    ang17    3    dih17
O    13    bnd18    8    ang18    7    dih18
H    9    bnd19    8    ang19    7    dih19
H    10    bnd20    9    ang20    8    dih20
H    11    bnd21    10    ang21    9    dih21
H    12    bnd22    11    ang22    10   

([['S', None, None, None],
  ['C', 1.7215415384725903, None, None],
  ['C', 1.368654310718832, 110.59669894181114, None],
  ['C', 1.4283707113128978, 112.85609807396081, 359.6591882814368],
  ['C', 1.3567240623729835, 113.06224188542232, 359.1834506558814],
  ['N', 1.389419684597024, 128.54580920662556, 179.7266065872635],
  ['C', 1.377085340466978, 124.70045324013479, 59.104600307590665],
  ['C', 1.4075690611337892, 122.85162196381324, 188.73491580221508],
  ['C', 1.3928600406377967, 121.61211778987945, 179.45879681814853],
  ['C', 1.3743136372031863, 120.55322496941, 0.4930956584840358],
  ['C', 1.3933330051951474, 118.69580457396938, 0.12323369082624826],
  ['C', 1.3757120860182561, 121.14509266781148, 359.6720582042043],
  ['N', 1.4594055534298578, 122.178839201791, 359.6047885840238],
  ['O', 1.2111797256757442, 118.43682571641493, 171.42027369292083],
  ['C', 1.4953229911109471, 128.03704141624453, 182.40544776050666],
  ['C', 1.4273691839527756, 122.60895286716502, 0.81947331514

In [9]:
len(roy.zmat)

27

In [11]:
from mace.calculators import mace_omol
calc = mace_omol(model_size = 'extra_large', device = 'cpu')
hess_cart = roy.calculator.get_hessian(atoms = roy.get_atoms())

Using float64 for MACECalculator, recommended for geometry optimization.


  torch.load(f=model_path, map_location=device)


Using head omol out of ['omol']


In [14]:
from mace.calculators import mace_off
calc_off= mace_off(model_size = 'extra_large', device = 'cpu')
hess_cart_off = calc_off.get_hessian(atoms = roy.get_atoms())

Using MACE-OFF23 MODEL for MACECalculator with /root/.cache/mace/MACE-OFF23_medium.model
Using float64 for MACECalculator, which is slower but more accurate. Recommended for geometry optimization.
Using head Default out of ['Default']


  torch.load(f=model_path, map_location=device)


In [13]:
hess_cart.shape

(81, 27, 3)

In [15]:
hess_cart_off.shape

(81, 27, 3)

In [10]:
roy.dump_json()
roy.dump_json(filename = 'test.json')
roy.dump_json('test.ligma.json')

In [2]:
roy_load = ZMatrix.load_json('test.json')

In [3]:
roy_load.forces

array([-1.19364587e-06, -2.07875238e-06, -4.79959244e-07, -7.88711762e-07,
       -1.63396473e-07,  1.74073528e-07,  4.69222268e-07,  3.39656202e-08,
       -1.47886822e-07, -6.59789529e-07,  5.13131318e-07,  6.60532489e-07,
        3.66792129e-07,  1.36243058e-07, -5.24463852e-08,  2.61760768e-07,
        4.67650674e-07, -7.82428441e-08,  1.10833903e-07,  1.04263913e-06,
       -2.75933954e-08,  1.69078832e-07,  2.24766885e-06, -6.62557488e-07,
        9.26383811e-07,  1.82103611e-06, -8.43979312e-07, -6.42040543e-07,
        1.51928055e-06, -5.20050094e-07,  6.44532102e-07,  1.99230143e-07,
       -5.03982532e-07,  6.96436471e-07,  4.87784783e-07,  1.01771267e-07,
       -9.26076342e-07,  1.07303592e-07, -1.03045861e-06,  1.17362019e-06,
        1.56153174e-08, -3.80072848e-07, -3.03223854e-06, -2.42464505e-07,
        1.66743086e-07, -1.54258168e-06, -2.58716239e-07, -1.81473530e-07,
       -3.78924394e-08,  7.08618216e-07,  4.39018460e-07, -2.13230126e-07,
       -4.77586208e-07, -

In [7]:
roy_load.attach_calculator('mace')
hess = roy_load.get_hessian()

  _Jd, _W3j_flat, _W3j_indices = torch.load(os.path.join(os.path.dirname(__file__), 'constants.pt'))


Using MACE-OFF23 MODEL for MACECalculator with /root/.cache/mace/MACE-OFF23_large.model
Using float64 for MACECalculator, which is slower but more accurate. Recommended for geometry optimization.


  torch.load(f=model_path, map_location=device)


In [9]:
hess

array([[ 2.97498471e+01,  5.23944736e+00,  4.38480866e+00, ...,
        -1.33032829e-01, -4.41512455e-01, -3.47856899e-03],
       [ 5.23944736e+00,  7.42895123e+01,  4.51797829e+01, ...,
        -1.30865423e-01,  1.64291120e-01, -5.50581179e-04],
       [ 4.38480866e+00,  4.51797829e+01,  8.68454293e+01, ...,
        -2.23850712e-01,  3.21335220e-01, -1.74415614e-02],
       ...,
       [-1.33032829e-01, -1.30865423e-01, -2.23850712e-01, ...,
         3.39865081e+01, -7.18668788e-02, -2.45091736e-03],
       [-4.41512455e-01,  1.64291120e-01,  3.21335220e-01, ...,
        -7.18668788e-02,  5.96871487e+00, -3.08536266e-03],
       [-3.47856899e-03, -5.50581179e-04, -1.74415614e-02, ...,
        -2.45091736e-03, -3.08536266e-03,  4.47560071e+00]])

In [44]:
import numpy as np

def print_hessian(hessian, zmat, constraints=None, block_size=5):
    """
    Print the lower‐triangular part of a Hessian matrix, but reorder so that
    any DOFs listed in constraints appear at the end.  Variable names are
    built internally from `zmat`.  Columns are delimited by two spaces,
    and the first digit of every number (not the sign) lines up in the same
    column.
    """
    H = np.asarray(hessian)
    m = H.shape[0]
    if H.shape[1] != m:
        raise ValueError("Hessian must be square")

    # default empty constraints
    if constraints is None:
        class _C:
            bonds = []
            angles = []
            dihedrals = []
        constraints = _C()

    # 1) build the original variable‐name list in zmat order
    orig_names = []
    n = len(zmat)
    for i in range(1, n):
        if zmat[i][1] is not None:
            orig_names.append(f"bnd{i+1}")
        if i >= 2 and zmat[i][2] is not None:
            orig_names.append(f"ang{i+1}")
        if i >= 3 and zmat[i][3] is not None:
            orig_names.append(f"dih{i+1}")

    if len(orig_names) != m:
        raise ValueError(f"zmat yields {len(orig_names)} DOFs, but Hessian is {m}×{m}")

    # 2) collect the constant DOFs
    const_names = [f"bnd{idx+1}" for idx, _ in constraints.bonds] + \
                  [f"ang{idx+1}" for idx, _ in constraints.angles] + \
                  [f"dih{idx+1}" for idx, _ in constraints.dihedrals]

    # 3) split into non‐const then const
    nonconst = [nm for nm in orig_names if nm not in const_names]
    new_order = nonconst + const_names

    # 4) permute H to match the new order
    idx_map = [orig_names.index(nm) for nm in new_order]
    H2 = H[np.ix_(idx_map, idx_map)]

    # 5) determine field widths so digits align at col 1
    example = f"{0.0:.8E}"         # e.g. "0.00000000E+00" (14 chars)
    digit_width = len(example)     # 14
    fw = digit_width + 1           # reserve 1 char for sign or leading space -> 15

    # 6) print in blocks of columns
    for block_start in range(0, m, block_size):
        block_end = min(block_start + block_size, m)

        # header row
        print(" " * (fw + 1), end=" ")
        for j in range(block_start, block_end):
            print(f"{new_order[j]:{fw}s}", end=" ")
        print()

        # data rows
        for i in range(block_start, m):
            # row label
            print(f"{new_order[i]:{fw}s}", end=" ")
            for j in range(block_start, block_end):
                val = H2[i, j]
                # build abs‐value string
                sig = "-" if val < 0 else " "
                body = f"{abs(val):.8E}"       # always starts with a digit
                entry = (sig + body).ljust(fw)  # pad on right
                print(entry, end=" ")
            print()


In [54]:
import numpy as np

def print_hessian(hessian, zmat, constraints=None, block_size=5):
    """
    As before, but the variable-name column is only 7 characters wide;
    numeric columns remain 15 chars (14 digits + sign) and use a single-space delimiter.
    """
    H = np.asarray(hessian)
    m = H.shape[0]
    if H.shape[1] != m:
        raise ValueError("Hessian must be square")

    if constraints is None:
        class _C:
            bonds = []; angles = []; dihedrals = []
        constraints = _C()

    # 1) Build orig_names
    orig_names = []
    for i in range(1, len(zmat)):
        if zmat[i][1] is not None:       orig_names.append(f"bnd{i+1}")
        if i >= 2 and zmat[i][2] is not None: orig_names.append(f"ang{i+1}")
        if i >= 3 and zmat[i][3] is not None: orig_names.append(f"dih{i+1}")
    if len(orig_names) != m:
        raise ValueError(f"zmat yields {len(orig_names)} DOFs, but Hessian is {m}")

    # 2) Constant DOFs
    consts = [f"bnd{idx+1}" for idx,_ in constraints.bonds] + \
             [f"ang{idx+1}" for idx,_ in constraints.angles] + \
             [f"dih{idx+1}" for idx,_ in constraints.dihedrals]

    # 3) Reorder
    nonconst = [nm for nm in orig_names if nm not in consts]
    new_order = nonconst + consts
    idx_map   = [orig_names.index(nm) for nm in new_order]
    H2        = H[np.ix_(idx_map, idx_map)]

    # 4) Field widths
    name_fw = 6
    num_fw  = len(f"{0.0:.8E}") + 1  # 14 digits + sign = 15

    # 5) Print in blocks
    for block_start in range(0, m, block_size):
        block_end = min(block_start + block_size, m)

        # Header: blank name-slot + single space, then column labels
        print(" " * (name_fw + 4), end=" ")
        for j in range(block_start, block_end):
            print(f"{new_order[j]:{num_fw}s}", end=" ")
        print()

        # Rows: name in width=7, then each numeric or blank
        for i in range(block_start, m):
            print(f"{new_order[i]:{name_fw}s}", end=" ")
            for j in range(block_start, block_end):
                if j > i:
                    # blank for upper triangle
                    print(" " * num_fw, end=" ")
                else:
                    val = H2[i, j]
                    sig  = "-" if val < 0 else " "
                    body = f"{abs(val):.8E}"
                    entry = (sig + body).ljust(num_fw)
                    print(entry, end=" ")
            print()


In [55]:

print_hessian(hess, roy_load.zmat)

           bnd2            bnd3            ang3            bnd4            ang4            
bnd2    2.97498471E+01                                                                 
bnd3    5.23944736E+00  7.42895123E+01                                                 
ang3    4.38480866E+00  4.51797829E+01  8.68454293E+01                                 
bnd4   -8.02122946E+00  1.67205080E+01  1.82285458E+01  5.43876369E+01                 
ang4   -8.10263837E+00  5.44077684E+01  8.86495041E+01  4.04726313E+01  1.36933504E+02 
dih4    2.16283000E-01 -7.57130265E-01 -1.37014999E+00 -4.77105676E-01 -1.50406161E+00 
bnd5   -1.44501779E+00 -9.24275715E+00 -1.25593991E+01  5.59334729E+00 -1.06322128E+01 
ang5   -1.06981183E+01  2.17780871E+01  3.94893784E+01  3.01355550E+01  7.15377257E+01 
dih5    3.54804522E-02 -4.91840328E-01 -8.82240872E-01 -3.94801724E-01 -1.40526519E+00 
bnd6    2.18739978E+00  2.52397449E+00 -2.89234634E+00  8.46641781E-01  1.18317196E+00 
ang6   -8.09359271E-01  2.14

In [1]:
from ase.neighborlist import natural_cutoffs, neighbor_list

In [40]:
def get_zmat_def(atoms, cutoff_scale=1.2):
    """
    Generate a list of potentially sensible zmat connectivities from cartesian coordinates. Very under tested. Use at own risk.
    """
    import numpy as np

    # Build connectivity graph B
    cutoffs = natural_cutoffs(atoms)
    cutoffs = [c * cutoff_scale for c in cutoffs]
    i_list, j_list = neighbor_list('ij', atoms, cutoff=cutoffs)
    N = len(atoms)
    B = {i: set() for i in range(N)}
    for i, j in zip(i_list, j_list):
        if i != j:
            B[i].add(j)
            B[j].add(i)

    # Atom closest to centroid
    coords = atoms.get_positions()
    centroid = coords.mean(axis=0)
    origin = int(np.argmin(np.linalg.norm(coords - centroid, axis=1)))

    # Initialize
    R = {origin: {'b': None, 'a': None, 'd': None}}
    visited = {origin}
    parent = {nbr: origin for nbr in B[origin]}
    work = {nbr: B[nbr] - visited for nbr in B[origin]}

    def max_valent(neigh):
        return max(neigh, key=lambda x: len(B[x]))

    # Main loop
    while work:
        new_work = {}
        # Sort by valency of frontier
        for i in sorted(work, key=lambda x: len(B[x])):
            if i in visited:
                continue
            b = parent[i]
            keys = list(R.keys())
            # Case: b in first three entries of R
            if b in keys[:3]:
                if len(R) == 1:
                    a = None; d = None
                elif len(R) == 2:
                    a = max_valent(B[b] & set(R.keys())); d = None
                else:
                    # len(R) >= 3
                    if parent.get(b) is not None:
                        a = parent[b]
                    else:
                        a = max_valent(B[b] & set(R.keys()))
                    # determine d
                    if parent.get(a) is not None and parent[a] not in {b, a}:
                        d = parent[a]
                    else:
                        neighbors_a = (B[a] & set(R.keys())) - {b, a}
                        if neighbors_a:
                            d = max_valent(neighbors_a)
                        else:
                            d = max_valent((B[b] & set(R.keys())) - {b, a})
            else:
                # fallback
                a = R[b]['b']; d = R[b]['a']

            R[i] = {'b': b, 'a': a, 'd': d}
            visited.add(i)
            # expand frontier
            for j in sorted(work[i], key=lambda x: len(B[x])):
                if j not in visited:
                    new_work[j] = B[j] - visited
                    parent[j] = i
        work = new_work

    # Construct list in insertion order
    zmat_def = []
    for atom, refs in R.items():
        zmat_def.append((atom, refs['b'], refs['a'], refs['d']))
    return zmat_def

In [45]:
from ase.io import read
atoms = read('bzamid.xyz')
zmat_def = get_construction_table(atoms)

In [46]:
from pyzmat import ZmatUtils
zmat, zmat_conn = ZmatUtils.atoms_2_zmat_init(atoms, zmat_def)

In [47]:
zmat

[['C', None, None, None],
 ['C', 1.5015, None, None],
 ['C', 1.396400288755699, 116.5006989535442, None],
 ['C', 1.395600453409929, 124.6023447903844, 178.5261160405038],
 ['O', 1.2071005300379916, 120.61670140083324, 0.0],
 ['H', 1.0836995176971334, 117.78290936576172, 358.40588555983953],
 ['H', 1.0853991271477048, 121.46440888750422, 2.2395380191977137],
 ['N', 1.4126998516277265, 119.07921112996397, 184.00771165976886],
 ['C', 1.3849994078255055, 120.66838585286276, 178.10679783888457],
 ['C', 1.388799560133499, 120.51315707737834, 181.65543163074824],
 ['H', 1.0177003289239914, 108.99562711405436, 359.9999762366983],
 ['H', 1.0193001608834367, 106.85799980592463, 104.99999398418589],
 ['H', 1.0845999930153976, 119.88595696064819, 180.32475076645377],
 ['H', 1.0845008141960981, 119.77525125531147, 180.21877589756667],
 ['C', 1.3888020977468318, 120.06280095994408, 0.26094140669913096],
 ['H', 1.0847997516062589, 119.9807883453473, 179.75238521905777]]

In [48]:
zmat_conn

[('C', None, None, None),
 ('C', 0, None, None),
 ('C', 0, 1, None),
 ('C', 0, 1, 2),
 ('O', 1, 0, 2),
 ('H', 2, 0, 1),
 ('H', 3, 0, 1),
 ('N', 1, 0, 2),
 ('C', 2, 0, 1),
 ('C', 3, 0, 1),
 ('H', 7, 1, 0),
 ('H', 7, 1, 0),
 ('H', 8, 2, 0),
 ('H', 9, 3, 0),
 ('C', 9, 3, 0),
 ('H', 14, 9, 3)]

In [1]:
from pyzmat import *
from ase.io import read
 
xyzfile = 'bzamid.xyz'
atoms = read(xyzfile)
zmat_def = ZmatUtils.get_zmat_def(atoms)
zmat, zmat_conn = ZmatUtils.atoms_2_zmat_init(atoms, zmat_def)
 
mol = ZMatrix(zmat, zmat_conn)
 
preamble = '''your preamble here'''
com_file = 'bzamid.com'
 
mol.save_gaussian_com(com_file, preamble)