In [1]:
import xml.etree.ElementTree as etree

tree = etree.parse('/home/zhangi/choderalab/openmmforcefields/amber/ffxml/protein.ff14SB.xml')
root = tree.getroot()

# Get bonds
names = ['ASH', 'GLH', 'HID', 'HIE', 'HIP', 'LYN']
# CYM is deprotonated or bound-to-metal-ions CYS -- -- decided not to support this because PyMOL cannot generate a template PDB for it
# HYP is hydoxyproline -- decided not to support this because PyMOL cannot generate a template PDB for it
# NHE is NH2 cap

d_residues = {}
residues = root.find('Residues')
for residue in residues.findall('Residue'):
    name = residue.attrib['name']
    if name in names:
        d_residues[name] = []
        bonds = [['-C', 'N'], ['C', 'OXT'], ['HXT', 'OXT'], ['H2', 'N'], ['H3', 'N']] # Because the standard aas in residues.xml have this
        if name == 'LYN':
            bonds.append(['HZ1', 'NZ']) # Add this as a potential bond, as the deprotonated LYS may involve HZ1 and HZ2 or HZ2 and HZ3, dependiing on how hydrogens were added/removed
        for bond in residue.findall('Bond'):
            bonds.append(sorted((bond.attrib['atomName1'], bond.attrib['atomName2'])))
        d_residues[name] = sorted(bonds)
        
    

In [27]:
d_residues

{'ASH': [['-C', 'N'],
  ['C', 'CA'],
  ['C', 'O'],
  ['C', 'OXT'],
  ['CA', 'CB'],
  ['CA', 'HA'],
  ['CA', 'N'],
  ['CB', 'CG'],
  ['CB', 'HB2'],
  ['CB', 'HB3'],
  ['CG', 'OD1'],
  ['CG', 'OD2'],
  ['H', 'N'],
  ['H2', 'N'],
  ['H3', 'N'],
  ['HD2', 'OD2'],
  ['HXT', 'OXT']],
 'CYM': [['-C', 'N'],
  ['C', 'CA'],
  ['C', 'O'],
  ['C', 'OXT'],
  ['CA', 'CB'],
  ['CA', 'HA'],
  ['CA', 'N'],
  ['CB', 'HB2'],
  ['CB', 'HB3'],
  ['CB', 'SG'],
  ['H', 'N'],
  ['H2', 'N'],
  ['H3', 'N'],
  ['HXT', 'OXT']],
 'GLH': [['-C', 'N'],
  ['C', 'CA'],
  ['C', 'O'],
  ['C', 'OXT'],
  ['CA', 'CB'],
  ['CA', 'HA'],
  ['CA', 'N'],
  ['CB', 'CG'],
  ['CB', 'HB2'],
  ['CB', 'HB3'],
  ['CD', 'CG'],
  ['CD', 'OE1'],
  ['CD', 'OE2'],
  ['CG', 'HG2'],
  ['CG', 'HG3'],
  ['H', 'N'],
  ['H2', 'N'],
  ['H3', 'N'],
  ['HE2', 'OE2'],
  ['HXT', 'OXT']],
 'HID': [['-C', 'N'],
  ['C', 'CA'],
  ['C', 'O'],
  ['C', 'OXT'],
  ['CA', 'CB'],
  ['CA', 'HA'],
  ['CA', 'N'],
  ['CB', 'CG'],
  ['CB', 'HB2'],
  ['CB', 'HB3'],
 

In [2]:
# create the file structure
residues = etree.Element('Residues')
for name, bonds in d_residues.items():
    residue = etree.SubElement(residues, 'Residue')
    residue.set('name', name)
    for bond in bonds:
        hydrogen = etree.SubElement(residue, 'Bond')
        hydrogen.set('from', bond[0])
        hydrogen.set('to', bond[1])

tree = etree.ElementTree(residues)
tree.write("residues_nonstandard.xml")
