In [1]:
from perses.tests.test_topology_proposal import generate_dipeptide_top_pos_sys, generate_atp
from pkg_resources import resource_filename
import os
from perses.rjmc.topology_proposal import PolymerProposalEngine, PointMutationEngine

INFO:rdkit:Enabling RDKit 2021.03.4 jupyter extensions


In [2]:
import logging
_logger = logging.getLogger()
from simtk.openmm import app
from perses.rjmc.topology_proposal import append_topology

In [5]:
class PointMutationEngine2(PointMutationEngine):
    def propose(self,
                current_system,
                current_topology,
                current_metadata=None):
        """
        Generate a TopologyProposal
        Parameters
        ----------
        current_system : simtk.openmm.System object
            The current system object
        current_topology : simtk.openmm.app.Topology object
            The current topology
        current_metadata : dict -- OPTIONAL
        Returns
        -------
        topology_proposal : TopologyProposal
            NamedTuple of type TopologyProposal containing forward and reverse
            probabilities, as well as old and new topologies and atom
            mapping
        local_atom_map_stereo_sidechain : dict
            chirality-corrected map of new_oemol_res to old_oemol_res
        old_oemol_res : openeye.oechem.oemol object
            oemol of the old residue sidechain
        new_oemol_res : openeye.oechem.oemol object
            oemol of the new residue sidechain
        """
        _logger.info(f"\tConducting polymer point mutation proposal...")
        old_topology = app.Topology()
        append_topology(old_topology, current_topology)

        # new_topology : simtk.openmm.app.Topology
        new_topology = app.Topology()
        append_topology(new_topology, current_topology)

        # Check that old_topology and old_system have same number of atoms.
        old_system = current_system
        old_topology_natoms = old_topology.getNumAtoms()  # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        # metadata : dict, key = 'chain_id' , value : str
        metadata = current_metadata
        if metadata is None:
            metadata = dict()

        # old_chemical_state_key : str
        _logger.debug(f"\tcomputing state key of old topology...")
        old_chemical_state_key = self.compute_state_key(old_topology)
        _logger.debug(f"\told chemical state key for chain {self._chain_id}: {old_chemical_state_key}")

        # index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue)
        _logger.debug(f"\tchoosing mutant...")
        index_to_new_residues, metadata = self._choose_mutant(old_topology, metadata)
        _logger.debug(f"\t\tindex to new residues: {index_to_new_residues}")

        # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter name of proposed residue))
        _logger.debug(f"\tgenerating residue map...")
        residue_map = self._generate_residue_map(old_topology, index_to_new_residues)
        _logger.debug(f"\t\tresidue map: {residue_map}")

        for (res, new_name) in residue_map:
            if res.name == new_name:
                #remove the index_to_new_residues entries where the topology is already mutated
                del(index_to_new_residues[res.index])
        if len(index_to_new_residues) == 0:
            _logger.debug(f"\t\tno mutation detected in this proposal; generating old proposal")
            atom_map = dict()
            for atom in old_topology.atoms():
                atom_map[atom.index] = atom.index
            _logger.debug('PolymerProposalEngine: No changes to topology proposed, returning old system and topology')
            topology_proposal = TopologyProposal(new_topology=old_topology, new_system=old_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=old_chemical_state_key, logp_proposal=0.0, new_to_old_atom_map=atom_map)
            return topology_proposal

        elif len(index_to_new_residues) > 1:
            raise Exception("Attempting to mutate more than one residue at once: ", index_to_new_residues, " The geometry engine cannot handle this.")

        chosen_res_index = list(index_to_new_residues.keys())[0]
        # Add modified_aa property to residues in old topology
        for res in old_topology.residues():
            res.modified_aa = True if res.index in index_to_new_residues.keys() else False

        _logger.debug(f"\tfinal index_to_new_residues: {index_to_new_residues}")
        _logger.debug(f"\tfinding excess and missing atoms/bonds...")
        # Identify differences between old topology and proposed changes
        # excess_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue
        # excess_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) bonds from existing residue not in new residue
        # missing_bonds : list(tuple (simtk.openmm.app.topology._TemplateAtomData, simtk.openmm.app.topology._TemplateAtomData)) bonds from new residue not in existing residue
        excess_atoms, excess_bonds, missing_atoms, missing_bonds = self._identify_differences(old_topology, residue_map)

        # Delete excess atoms and bonds from old topology
        excess_atoms_bonds = excess_atoms + excess_bonds
        _logger.debug(f"\t excess atoms bonds: {excess_atoms_bonds}")
        new_topology = self._delete_atoms(old_topology, excess_atoms_bonds)

        # Add missing atoms and bonds to new topology
        new_topology = self._add_new_atoms(new_topology, missing_atoms, missing_bonds, residue_map)
        
        return new_topology
#         # index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue)
#         _logger.debug(f"\tconstructing atom map for TopologyProposal...")
#         atom_map, old_res_to_oemol_map, new_res_to_oemol_map, local_atom_map_stereo_sidechain, current_oemol_sidechain, proposed_oemol_sidechain, old_oemol_res_copy, new_oemol_res_copy  = self._construct_atom_map(residue_map, old_topology, index_to_new_residues, new_topology)

#         _logger.debug(f"\tadding indices of the 'C' backbone atom in the next residue and the 'N' atom in the previous")
#         _logger.debug(f"\t{list(index_to_new_residues.keys())[0]}")
#         extra_atom_map = self._find_adjacent_residue_atoms(old_topology, new_topology, list(index_to_new_residues.keys())[0])
#         _logger.debug(f"\tfound extra atom map: {extra_atom_map}")

#         #now to add all of the other residue atoms to the atom map...
#         all_other_residues_new = [res for res in new_topology.residues() if res.index != list(index_to_new_residues.keys())[0]]
#         all_other_residues_old = [res for res in old_topology.residues() if res.index != list(index_to_new_residues.keys())[0]]

#         all_other_atoms_map = {}
#         for res_new, res_old in zip(all_other_residues_new, all_other_residues_old):
#             assert res_new.name == res_old.name, f"all other residue names do not match"
#             all_other_atoms_map.update({atom_new.index: atom_old.index for atom_new, atom_old in zip(res_new.atoms(), res_old.atoms())})

#         # new_chemical_state_key : str
#         new_chemical_state_key = self.compute_state_key(new_topology)
#         # new_system : simtk.openmm.System

#         # Copy periodic box vectors from current topology
#         new_topology.setPeriodicBoxVectors(current_topology.getPeriodicBoxVectors())

#         # Build system
#         # TODO: Remove build_system() branch once we convert entirely to new openmm-forcefields SystemBuilder
#         if hasattr(self._system_generator, 'create_system'):
#             new_system = self._system_generator.create_system(new_topology)
#         else:
#             new_system = self._system_generator.build_system(new_topology)

#         # Explicitly de-map any atoms involved in constraints that change length
#         atom_map = SmallMoleculeSetProposalEngine._constraint_repairs(atom_map, old_system, new_system, old_topology, new_topology)
#         _logger.debug(f"\tafter constraint repairs, the atom map is as such: {atom_map}")

#         _logger.debug(f"\tadding all env atoms to the atom map...")
#         atom_map.update(all_other_atoms_map)

#         old_res_names = [res.name for res in old_topology.residues() if res.index == list(index_to_new_residues.keys())[0]]
#         assert len(old_res_names) == 1, f"no old res name match found"
#         old_res_name = old_res_names[0]
#         _logger.debug(f"\told res name: {old_res_name}")
#         new_res_name = list(index_to_new_residues.values())[0]

#         # Create TopologyProposal.
#         current_res = [res for res in current_topology.residues() if res.index == chosen_res_index][0]
#         proposed_res = [res for res in new_topology.residues() if res.index == chosen_res_index][0]
#         augment_openmm_topology(topology = old_topology, residue_oemol = old_oemol_res_copy, residue_topology = current_res, residue_to_oemol_map = old_res_to_oemol_map)
#         augment_openmm_topology(topology = new_topology, residue_oemol = new_oemol_res_copy, residue_topology = proposed_res, residue_to_oemol_map = new_res_to_oemol_map)

#         topology_proposal = TopologyProposal(logp_proposal = 0.,
#                                              new_to_old_atom_map = atom_map,
#                                              old_topology = old_topology,
#                                              new_topology  = new_topology,
#                                              old_system = old_system,
#                                              new_system = new_system,
#                                              old_alchemical_atoms = [atom.index for atom in current_res.atoms()] + list(extra_atom_map.values()),
#                                              old_chemical_state_key = old_chemical_state_key,
#                                              new_chemical_state_key = new_chemical_state_key,
#                                              old_residue_name = old_res_name,
#                                              new_residue_name = new_res_name)

#         # Check that old_topology and old_system have same number of atoms.
#         old_topology_natoms = old_topology.getNumAtoms()  # number of topology atoms
#         old_system_natoms = old_system.getNumParticles()
#         if old_topology_natoms != old_system_natoms:
#             msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms)
#             raise Exception(msg)

#         # Check that new_topology and new_system have same number of atoms.
#         new_topology_natoms = new_topology.getNumAtoms()  # number of topology atoms
#         new_system_natoms = new_system.getNumParticles()
#         if new_topology_natoms != new_system_natoms:
#             msg = 'PolymerProposalEngine: new_topology has %d atoms, while new_system has %d atoms' % (new_topology_natoms, new_system_natoms)
#             raise Exception(msg)

#         # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map
#         natoms_old = topology_proposal.old_system.getNumParticles()
#         natoms_new = topology_proposal.new_system.getNumParticles()
#         if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)):
#             msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of new atoms (1..%d):\n" % natoms_new
#             msg += str(topology_proposal.new_to_old_atom_map)
#             raise Exception(msg)
#         if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)):
#             msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new
#             msg += str(topology_proposal.new_to_old_atom_map)
#             raise Exception(msg)

#         #validate the old/new system matches
#         # TODO: create more rigorous checks for this validation either in TopologyProposal or in the HybridTopologyFactory
#         #assert PolymerProposalEngine.validate_core_atoms_with_system(topology_proposal)


#         return topology_proposal

In [3]:
ala, system_generator = generate_atp()

DEBUG:openmmforcefields.system_generators:Trying GAFFTemplateGenerator to load gaff-2.11


In [6]:
point_mutation_engine = PointMutationEngine2(wildtype_topology=ala.topology,
                                            system_generator=system_generator,
                                            chain_id='1', # Denote the chain id allowed to mutate (it's always a string variable)
                                            max_point_mutants=1,
                                            residues_allowed_to_mutate=['2'], # The residue ids allowed to mutate
                                            allowed_mutations=[('2', 'LYN')], # The residue ids allowed to mutate with the three-letter code allowed to change
                                            aggregate=True) # Always allow aggregation

# # Create a top proposal
# new_topology = point_mutation_engine.propose(current_system=ala.system, current_topology=ala.topology)


In [16]:
point_mutation_engine._templates['LYN'].atomIndices

{'N': 0,
 'H': 1,
 'CA': 2,
 'HA': 3,
 'CB': 4,
 'HB2': 5,
 'HB3': 6,
 'CG': 7,
 'HG2': 8,
 'HG3': 9,
 'CD': 10,
 'HD2': 11,
 'HD3': 12,
 'CE': 13,
 'HE2': 14,
 'HE3': 15,
 'NZ': 16,
 'HZ2': 17,
 'HZ3': 18,
 'C': 19,
 'O': 20}

In [14]:
for i, atom in enumerate(point_mutation_engine._templates['LYN'].atoms):
    if atom.name == 'HZ3':
        atom.name = 'HZ1'
    print(i, atom.name)

0 N
1 H
2 CA
3 HA
4 CB
5 HB2
6 HB3
7 CG
8 HG2
9 HG3
10 CD
11 HD2
12 HD3
13 CE
14 HE2
15 HE3
16 NZ
17 HZ2
18 HZ1
19 C
20 O


In [12]:
for bond in point_mutation_engine._templates['LYN'].bonds:
    print(bond)

(0, 1)
(0, 2)
(2, 3)
(2, 4)
(2, 19)
(4, 5)
(4, 6)
(4, 7)
(7, 8)
(7, 9)
(7, 10)
(10, 11)
(10, 12)
(10, 13)
(13, 14)
(13, 15)
(13, 16)
(16, 17)
(16, 18)
(19, 20)


In [5]:
generate_dipeptide_top_pos_sys(ala.topology, "LYN", ala.system, ala.positions, system_generator, conduct_htf_prop=True)

INFO:proposal_generator:	Conducting polymer point mutation proposal...


making topology proposal


AttributeError: 'NoneType' object has no attribute 'GetIdx'

In [32]:
old_res_name = 'ALA'
new_res_name = 'LYN'

current_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{old_res_name}.pdb"))
proposed_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{new_res_name}.pdb"))

current_oemol = PolymerProposalEngine.generate_oemol_from_pdb_template(current_residue_pdb_filename)
proposed_oemol = PolymerProposalEngine.generate_oemol_from_pdb_template(proposed_residue_pdb_filename)



In [34]:
for res in new_topology.residues():
    if res.name == 'LYN':
        for atom in res.atoms():
            print(atom)

<Atom 6 (N) of chain 0 residue 1 (LYN)>
<Atom 7 (H) of chain 0 residue 1 (LYN)>
<Atom 8 (CA) of chain 0 residue 1 (LYN)>
<Atom 9 (HA) of chain 0 residue 1 (LYN)>
<Atom 10 (CB) of chain 0 residue 1 (LYN)>
<Atom 11 (HB2) of chain 0 residue 1 (LYN)>
<Atom 12 (HB3) of chain 0 residue 1 (LYN)>
<Atom 13 (C) of chain 0 residue 1 (LYN)>
<Atom 14 (O) of chain 0 residue 1 (LYN)>
<Atom 15 (CG) of chain 0 residue 1 (LYN)>
<Atom 16 (HG2) of chain 0 residue 1 (LYN)>
<Atom 17 (HG3) of chain 0 residue 1 (LYN)>
<Atom 18 (CD) of chain 0 residue 1 (LYN)>
<Atom 19 (HD2) of chain 0 residue 1 (LYN)>
<Atom 20 (HD3) of chain 0 residue 1 (LYN)>
<Atom 21 (CE) of chain 0 residue 1 (LYN)>
<Atom 22 (HE2) of chain 0 residue 1 (LYN)>
<Atom 23 (HE3) of chain 0 residue 1 (LYN)>
<Atom 24 (NZ) of chain 0 residue 1 (LYN)>
<Atom 25 (HZ2) of chain 0 residue 1 (LYN)>
<Atom 26 (HZ3) of chain 0 residue 1 (LYN)>


In [36]:
_logger.debug(f"\t\t\told_oemol_res names: {[(atom.GetIdx(), atom.GetName()) for atom in current_oemol.GetAtoms()]}")
_logger.debug(f"\t\t\tnew_oemol_res names: {[(atom.GetIdx(), atom.GetName()) for atom in proposed_oemol.GetAtoms()]}")


DEBUG:root:			old_oemol_res names: [(0, 'N'), (1, 'CA'), (2, 'C'), (3, 'O'), (4, 'CB'), (5, 'H'), (6, 'HA'), (7, 'HB1'), (8, 'HB2'), (9, 'HB3'), (10, "H'")]
DEBUG:root:			new_oemol_res names: [(0, 'N'), (1, 'CA'), (2, 'C'), (3, 'O'), (4, 'CB'), (5, 'CG'), (6, 'CD'), (7, 'CE'), (8, 'NZ'), (9, 'H'), (10, 'HA'), (11, 'HB2'), (12, 'HB3'), (13, 'HG2'), (14, 'HG3'), (15, 'HD2'), (16, 'HD3'), (17, 'HE2'), (18, 'HE3'), (19, 'HZ1'), (20, 'HZ2'), (21, "H'")]


In [41]:
for atom in point_mutation_engine._templates['LYN'].atoms:
    print(atom.name)

N
H
CA
HA
CB
HB2
HB3
CG
HG2
HG3
CD
HD2
HD3
CE
HE2
HE3
NZ
HZ2
HZ3
C
O


In [None]:

#create bookkeeping dictionaries
new_res_to_oemol_map = {atom.index: proposed_oemol.GetAtom(oechem.OEHasAtomName(atom.name)).GetIdx() for atom in new_res.atoms()}
