In [1]:
import pickle
import os
from perses.app.relative_point_mutation_setup import PointMutationExecutor
import argparse
from simtk import openmm, unit
from simtk.openmm import app
from simtk.openmm.app.amberinpcrdfile import AmberInpcrdFile
from simtk.openmm.app.amberprmtopfile import AmberPrmtopFile
from tqdm import tqdm_notebook

INFO:numexpr.utils:Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
INFO:numexpr.utils:Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
INFO:rdkit:Enabling RDKit 2020.09.4 jupyter extensions


In [2]:
from __future__ import absolute_import

from perses.utils.openeye import createOEMolFromSDF, extractPositionsFromOEMol, oechem
from perses.annihilation.relative import HybridTopologyFactory, RepartitionedHybridTopologyFactory
from perses.rjmc.topology_proposal import PointMutationEngine, PolymerProposalEngine
from perses.rjmc.geometry import FFAllAngleGeometryEngine

import simtk.openmm as openmm
import simtk.openmm.app as app
import simtk.unit as unit
import numpy as np
from openmoltools import forcefield_generators
import mdtraj as md
from openmmtools.constants import kB
from perses.tests.utils import validate_endstate_energies
from openff.toolkit.topology import Molecule
from openmmforcefields.generators import SystemGenerator
from simtk.openmm import XmlSerializer

ENERGY_THRESHOLD = 1e-2
temperature = 300 * unit.kelvin
kT = kB * temperature
beta = 1.0/kT
ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS']

# Set up logger
import logging
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

from perses.rjmc.topology_proposal import append_topology, deepcopy_topology, SmallMoleculeSetProposalEngine, augment_openmm_topology, TopologyProposal, AtomMapper

from simtk.openmm import app

import copy
import logging
import itertools
import os
import openeye.oechem as oechem
import numpy as np
import networkx as nx
import openmoltools.forcefield_generators as forcefield_generators
from perses.storage import NetCDFStorageView
from perses.rjmc.geometry import NoTorsionError
from functools import partial
try:
    from subprocess import getoutput  # If python 3
except ImportError:
    from commands import getoutput  # If python 2

import tempfile
import re
from math import floor

from pkg_resources import resource_filename
import shutil


In [9]:
# Differences between system generated by PointMutationExecutor and RBD:ACE2 systems:
## ewaldErrorTolerance is 0.0005 in RBD:ACE2
## temperature is 310 K in RBD:ACE2

In [5]:
class PointMutationEngineRBD(PointMutationEngine):
    """
    ProposalEngine for generating point mutation variants of RBD:ACE2. Uses tleap to parametrize the glycosylated systems.
   
    """
    def propose(self,
                current_system,
                current_topology,
                current_positions,
                tleap_prefix,
                is_complex,
                debug_dir,
                current_metadata=None):
        """
        Generate a TopologyProposal
        Parameters
        ----------
        current_system : simtk.openmm.System object
            The current system object
        current_topology : simtk.openmm.app.Topology object
            The current topology
        current_positions : np.array
            The current positions
        tleap_prefix : str
            Prefix for tleap input and output files
        is_complex : boolean
            Indicates whether the current system is apo or complex
        debug_dir : str
            if specified, debug output files will be saved here
            
        current_metadata : dict -- OPTIONAL
        Returns
        -------
        topology_proposal : TopologyProposal
            NamedTuple of type TopologyProposal containing forward and reverse
            probabilities, as well as old and new topologies and atom
            mapping
        local_atom_map_stereo_sidechain : dict
            chirality-corrected map of new_oemol_res to old_oemol_res
        old_oemol_res : openeye.oechem.oemol object
            oemol of the old residue sidechain
        new_oemol_res : openeye.oechem.oemol object
            oemol of the new residue sidechain
        """
        
        _logger.info(f"\tConducting polymer point mutation proposal...")
        old_topology = app.Topology()
        append_topology(old_topology, current_topology)

        # new_topology : simtk.openmm.app.Topology
        new_topology = app.Topology()
        append_topology(new_topology, current_topology)
                
        # Check that old_topology and old_system have same number of atoms.
        old_system = current_system
        old_topology_natoms = old_topology.getNumAtoms()  # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        # metadata : dict, key = 'chain_id' , value : str
        metadata = current_metadata
        if metadata is None:
            metadata = dict()

        # old_chemical_state_key : str
        _logger.debug(f"\tcomputing state key of old topology...")
        old_chemical_state_key = self.compute_state_key(old_topology)
        _logger.debug(f"\told chemical state key for chain {self._chain_id}: {old_chemical_state_key}")

        # index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue)
        _logger.debug(f"\tchoosing mutant...")
        index_to_new_residues, metadata = self._choose_mutant(old_topology, metadata)
        _logger.debug(f"\t\tindex to new residues: {index_to_new_residues}")

        # residue_map : list(tuples : simtk.openmm.app.topology.Residue (existing residue), str (three letter name of proposed residue))
        _logger.debug(f"\tgenerating residue map...")
        residue_map = self._generate_residue_map(old_topology, index_to_new_residues)
        _logger.debug(f"\t\tresidue map: {residue_map}")

        for (res, new_name) in residue_map:
            if res.name == new_name:
                #remove the index_to_new_residues entries where the topology is already mutated
                del(index_to_new_residues[res.index])
        if len(index_to_new_residues) == 0:
            _logger.debug(f"\t\tno mutation detected in this proposal; generating old proposal")
            atom_map = dict()
            for atom in old_topology.atoms():
                atom_map[atom.index] = atom.index
            _logger.debug('PolymerProposalEngine: No changes to topology proposed, returning old system and topology')
            topology_proposal = TopologyProposal(new_topology=old_topology, new_system=old_system, old_topology=old_topology, old_system=old_system, old_chemical_state_key=old_chemical_state_key, new_chemical_state_key=old_chemical_state_key, logp_proposal=0.0, new_to_old_atom_map=atom_map)
            return topology_proposal

        elif len(index_to_new_residues) > 1:
            raise Exception("Attempting to mutate more than one residue at once: ", index_to_new_residues, " The geometry engine cannot handle this.")

        chosen_res_index = list(index_to_new_residues.keys())[0]
        # Add modified_aa property to residues in old topology
        for res in old_topology.residues():
            res.modified_aa = True if res.index in index_to_new_residues.keys() else False

        _logger.debug(f"\tfinal index_to_new_residues: {index_to_new_residues}")
        _logger.debug(f"\tfinding excess and missing atoms/bonds...")
        # Identify differences between old topology and proposed changes
        # excess_atoms : list(simtk.openmm.app.topology.Atom) atoms from existing residue not in new residue
        # excess_bonds : list(tuple (simtk.openmm.app.topology.Atom, simtk.openmm.app.topology.Atom)) bonds from existing residue not in new residue
        # missing_bonds : list(tuple (simtk.openmm.app.topology._TemplateAtomData, simtk.openmm.app.topology._TemplateAtomData)) bonds from new residue not in existing residue
        excess_atoms, excess_bonds, missing_atoms, missing_bonds = self._identify_differences(old_topology, residue_map)

        # Delete excess atoms and bonds from old topology
        excess_atoms_bonds = excess_atoms + excess_bonds
        _logger.debug(f"\t excess atoms bonds: {excess_atoms_bonds}")
        new_topology = self._delete_atoms(old_topology, excess_atoms_bonds)

        # Add missing atoms and bonds to new topology
        new_topology = self._add_new_atoms(new_topology, missing_atoms, missing_bonds, residue_map)

        # index_to_new_residues : dict, key : int (index) , value : str (three letter name of proposed residue)
        _logger.debug(f"\tconstructing atom map for TopologyProposal...")
        atom_map, old_res_to_oemol_map, new_res_to_oemol_map, local_atom_map_stereo_sidechain, current_oemol_sidechain, proposed_oemol_sidechain, old_oemol_res_copy, new_oemol_res_copy  = self._construct_atom_map(residue_map, old_topology, index_to_new_residues, new_topology)

        _logger.debug(f"\tadding indices of the 'C' backbone atom in the next residue and the 'N' atom in the previous")
        _logger.debug(f"\t{list(index_to_new_residues.keys())[0]}")
        extra_atom_map = self._find_adjacent_residue_atoms(old_topology, new_topology, list(index_to_new_residues.keys())[0])
        _logger.debug(f"\tfound extra atom map: {extra_atom_map}")

        #now to add all of the other residue atoms to the atom map...
        all_other_residues_new = [res for res in new_topology.residues() if res.index != list(index_to_new_residues.keys())[0]]
        all_other_residues_old = [res for res in old_topology.residues() if res.index != list(index_to_new_residues.keys())[0]]

        all_other_atoms_map = {}
        for res_new, res_old in zip(all_other_residues_new, all_other_residues_old):
            assert res_new.name == res_old.name, f"all other residue names do not match"
            all_other_atoms_map.update({atom_new.index: atom_old.index for atom_new, atom_old in zip(res_new.atoms(), res_old.atoms())})

        # new_chemical_state_key : str
        new_chemical_state_key = self.compute_state_key(new_topology)
        # new_system : simtk.openmm.System

        # Copy periodic box vectors from current topology
        new_topology.setPeriodicBoxVectors(current_topology.getPeriodicBoxVectors())
        
        # Build system
        new_positions, new_system = self._generate_new_tleap_system(tleap_prefix, debug_dir, old_topology, new_topology, current_positions, is_complex)

        _logger.info("Finishing up topology proposal")
        
        #make constraint repairs
        atom_map = SmallMoleculeSetProposalEngine._constraint_repairs(atom_map, old_system, new_system, old_topology, new_topology)
        _logger.debug(f"\tafter constraint repairs, the atom map is as such: {atom_map}")

        _logger.debug(f"\tadding all env atoms to the atom map...")
        atom_map.update(all_other_atoms_map)

        old_res_names = [res.name for res in old_topology.residues() if res.index == list(index_to_new_residues.keys())[0]]
        assert len(old_res_names) == 1, f"no old res name match found"
        old_res_name = old_res_names[0]
        _logger.debug(f"\told res name: {old_res_name}")
        new_res_name = list(index_to_new_residues.values())[0]

        # Adjust logp_propose based on HIS presence
        # his_residues = ['HID', 'HIE']
        # old_residue = residue_map[0][0]
        # proposed_residue = residue_map[0][1]
        # if old_residue.name in his_residues and proposed_residue not in his_residues:
        #     logp_propose = math.log(2)
        # elif old_residue.name not in his_residues and proposed_residue in his_residues:
        #     logp_propose = math.log(0.5)
        # else:
        #     logp_propose = 0.0

        #we should be able to check the system to make sure that all of the core atoms
        
        # Create TopologyProposal.
        current_res = [res for res in current_topology.residues() if res.index == chosen_res_index][0]
        proposed_res = [res for res in new_topology.residues() if res.index == chosen_res_index][0]
        augment_openmm_topology(topology = old_topology, residue_oemol = old_oemol_res_copy, residue_topology = current_res, residue_to_oemol_map = old_res_to_oemol_map)
        augment_openmm_topology(topology = new_topology, residue_oemol = new_oemol_res_copy, residue_topology = proposed_res, residue_to_oemol_map = new_res_to_oemol_map)
        
        topology_proposal = TopologyProposal(logp_proposal = 0.,
                                             new_to_old_atom_map = atom_map,
                                             old_topology = old_topology,
                                             new_topology  = new_topology,
                                             old_system = old_system,
                                             new_system = new_system,
                                             old_alchemical_atoms = [atom.index for atom in current_res.atoms()] + list(extra_atom_map.values()),
                                             old_chemical_state_key = old_chemical_state_key,
                                             new_chemical_state_key = new_chemical_state_key,
                                             old_residue_name = old_res_name,
                                             new_residue_name = new_res_name)
                
        # Check that old_topology and old_system have same number of atoms.
        old_topology_natoms = old_topology.getNumAtoms()  # number of topology atoms
        old_system_natoms = old_system.getNumParticles()
        if old_topology_natoms != old_system_natoms:
            msg = 'PolymerProposalEngine: old_topology has %d atoms, while old_system has %d atoms' % (old_topology_natoms, old_system_natoms)
            raise Exception(msg)

        # Check that new_topology and new_system have same number of atoms.
        new_topology_natoms = new_topology.getNumAtoms()  # number of topology atoms
        new_system_natoms = new_system.getNumParticles()
        if new_topology_natoms != new_system_natoms:
            msg = 'PolymerProposalEngine: new_topology has %d atoms, while new_system has %d atoms' % (new_topology_natoms, new_system_natoms)
            raise Exception(msg)

        # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map
        natoms_old = topology_proposal.old_system.getNumParticles()
        natoms_new = topology_proposal.new_system.getNumParticles()
        if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)):
            msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of new atoms (1..%d):\n" % natoms_new
            msg += str(topology_proposal.new_to_old_atom_map)
            raise Exception(msg)
        if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)):
            msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new
            msg += str(topology_proposal.new_to_old_atom_map)
            raise Exception(msg)

        #validate the old/new system matches
        # TODO: create more rigorous checks for this validation either in TopologyProposal or in the HybridTopologyFactory
        #assert PolymerProposalEngine.validate_core_atoms_with_system(topology_proposal)

        
        return topology_proposal, new_positions
    
    def _add_new_atoms(self, topology, missing_atoms, missing_bonds, residue_map):
        """
        Add new atoms (and corresponding bonds) to new residues
        Parameters
        ----------
        topology : simtk.openmm.app.Topology
            extra atoms from old residue have been deleted, missing atoms in new residue not yet added
        missing_atoms : dict
            key : simtk.openmm.app.topology.Residue
            value : list(simtk.openmm.app.topology._TemplateAtomData)
        missing_bonds : list(tuple (simtk.openmm.app.topology._TemplateAtomData, simtk.openmm.app.topology._TemplateAtomData))
            bonds from new residue not in existing residue
        residue_map : list(tuples)
            simtk.openmm.app.topology.Residue, str (three letter residue name of new residue)
        Returns
        -------
        topology : simtk.openmm.app.Topology
            new residues have all correct atoms and bonds for desired mutation
        """
        _logger.info("Adding new atoms")
        old_residue = residue_map[0][0]
        new_residue_name = residue_map[0][1]
        template = self._templates[residue_map[0][1]] # Assume that residue_map has only one mutation
        template_atoms = list(template.atoms)
        
        new_topology = app.Topology()
        new_topology.setPeriodicBoxVectors(topology.getPeriodicBoxVectors())
        # new_atoms : dict, key : simtk.openmm.app.topology.Atom, value : simtk.openmm.app.topology.Atom maps old atoms to the corresponding Atom in the new residue
        new_atoms = {}
        # new_atom_names : dict, key : str new atom name, value : simtk.openmm.app.topology.Atom maps name of new atom to the corresponding Atom in the new residue (only contains map for missing residue)
        new_atom_names = {}
        # old_residues : list(simtk.openmm.app.topology.Residue)
        old_residues = [old.index for old, new in residue_map]
        for chain in topology.chains():
            new_chain = new_topology.addChain(chain.id)
            for residue in chain.residues():
                new_residue = new_topology.addResidue(residue.name, new_chain, residue.id)
                # Add modified property to residues in new topology
                new_residue.modified_aa = True if residue.index in old_residues else False
                # Copy over atoms from old residue to new residue
                if self._is_residue_equal(residue, old_residue):
                    old_atom_map = {atom.name : atom for atom in residue.atoms()}
                    for atom in template_atoms:
                        if atom in missing_atoms[old_residue]:
                            new_atom = new_topology.addAtom(atom.name, atom.element, new_residue)
                            new_atoms[atom] = new_atom
                            new_atom_names[new_atom.name] = new_atom
                        else:
                            old_atom = old_atom_map[atom.name]
                            new_atom = new_topology.addAtom(old_atom.name, old_atom.element, new_residue)
                            new_atom.old_index = old_atom.old_index
                            new_atoms[old_atom] = new_atom
                            if new_residue.modified_aa:
                                new_atom_names[new_atom.name] = new_atom
                    new_residue.name = residue_map[0][1]
                else:
                    for atom in residue.atoms():
                        # new_atom : simtk.openmm.app.topology.Atom
                        new_atom = new_topology.addAtom(atom.name, atom.element, new_residue)
                        new_atom.old_index = atom.old_index
                        new_atoms[atom] = new_atom
                        if new_residue.modified_aa:
                            new_atom_names[new_atom.name] = new_atom

        # Copy over bonds from topology to new topology
        for bond in topology.bonds():
            new_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]])
        
        for bond in missing_bonds:
            new_topology.addBond(new_atom_names[bond[0].name], new_atom_names[bond[1].name])

        return new_topology
    
    def _construct_atom_map(self,
                            residue_map,
                            old_topology,
                            index_to_new_residues,
                            new_topology):
        """
        Construct atom map (key: index to new residue, value: index to old residue) to supply as an argument to the TopologyProposal.
        Parameters
        ----------
        residue_map : list(tuples)
            simtk.openmm.app.topology.Residue, str (three letter residue name of new residue)
        old_topology : simtk.openmm.app.Topology
            topology of old system
        index_to_new_residues : dict
            key : int (index) , value : str (three letter name of proposed residue)
        new_topology : simtk.openmm.app.Topology
            topology of new system
        Returns
        -------
        adjusted_atom_map : dict, key: int (index
            new residues have all correct atoms and bonds for desired mutation
        old_res_to_oemol_map : dict
            key: int (index);  value: int (index)
        new_res_to_oemol_map : dict
            key: int (index);  value: int (index)
        local_atom_map_stereo_sidechain : dict
            chirality-corrected map of new_oemol_res to old_oemol_res
        current_oemol : openeye.oechem.oemol object
            copy of modified old oemol sidechain
        proposed_oemol : openeye.oechem.oemol object
            copy of modified new oemol sidechain
        old_oemol_res_copy : openeye.oechem.oemol object
            copy of modified old oemol
        new_oemol_res_copy : openeye.oechem.oemol object
            copy of modified new oemol
        """
        from pkg_resources import resource_filename
        import openeye.oechem as oechem #must this be explicit?

        # atom_map : dict, key : int (index of atom in old topology) , value : int (index of same atom in new topology)
        atom_map = dict()

        # atoms with an old_index attribute should be mapped
        # k : int
        # atom : simtk.openmm.app.topology.Atom

        # old_to_new_residues : dict, key : str old residue name, key : simtk.openmm.app.topology.Residue new residue
        old_to_new_residues = {}
        new_residues = [residue for residue in new_topology.residues()] # Assumes all residue indices start from 0 and are contiguous
        for old_residue in old_topology.residues():
            old_to_new_residues[old_residue] = new_residues[old_residue.index]
        #_logger.debug(f"\t\told_to_new_residues: {old_to_new_residues}")

        # modified_residues : dict, key : index of old residue, value : proposed residue
        modified_residues = dict()

        for map_entry in residue_map:
            old_residue = map_entry[0]
            modified_residues[old_residue.index] = old_to_new_residues[old_residue]
        _logger.debug(f"\t\tmodified residues: {modified_residues}")

        # old_residues : dict, key : index of old residue, value : old residue
        old_residues = dict()
        for residue in old_topology.residues():
            if residue.index in index_to_new_residues.keys():
                old_residues[residue.index] = residue
        _logger.debug(f"\t\t\told residues: {old_residues}")

        # Update atom map with atom mappings for residues that have been modified
        assert len(index_to_new_residues) == 1, f"index_to_new_residues is not of length 1"
        index = list(index_to_new_residues.keys())[0]
        #old_res = old_residues[index]
        old_res = old_residues[index]
        new_res = modified_residues[index]
        _logger.debug(f"\t\t\told res: {old_res.name}; new res: {new_res.name}")

        new_res_index_to_name = {atom.index: atom.name for atom in new_res.atoms()}
        old_res_index_to_name = {atom.index: atom.name for atom in old_res.atoms()}

        _logger.debug(f"\t\t\told topology res names: {old_res_index_to_name}")
        _logger.debug(f"\t\t\tnew topology res names: {new_res_index_to_name}")

        old_res_name = old_res.name
        new_res_name = new_res.name

        #make correction for HIS
        his_templates = ['HIE', 'HID']
        if old_res_name in his_templates:
            old_res_name = 'HIS'
        if new_res_name in his_templates:
            new_res_name = 'HIS'
        else:
            pass

        current_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{old_res_name}.pdb"))
        proposed_residue_pdb_filename = resource_filename('perses', os.path.join('data', 'amino_acid_templates', f"{new_res_name}.pdb"))

        current_oemol = PolymerProposalEngine.generate_oemol_from_pdb_template(current_residue_pdb_filename)
        proposed_oemol = PolymerProposalEngine.generate_oemol_from_pdb_template(proposed_residue_pdb_filename)

        old_oemol_res_copy = copy.deepcopy(current_oemol)
        new_oemol_res_copy = copy.deepcopy(proposed_oemol)


        _logger.debug(f"\t\t\told_oemol_res names: {[(atom.GetIdx(), atom.GetName()) for atom in current_oemol.GetAtoms()]}")
        _logger.debug(f"\t\t\tnew_oemol_res names: {[(atom.GetIdx(), atom.GetName()) for atom in proposed_oemol.GetAtoms()]}")

        #create bookkeeping dictionaries
        old_res_to_oemol_map = {atom.index: current_oemol.GetAtom(oechem.OEHasAtomName(atom.name)).GetIdx() for atom in old_res.atoms()}
        new_res_to_oemol_map = {atom.index: proposed_oemol.GetAtom(oechem.OEHasAtomName(atom.name)).GetIdx() for atom in new_res.atoms()}

        old_oemol_name_idx = {atom.GetName(): atom.GetIdx() for atom in current_oemol.GetAtoms()}
        new_oemol_name_idx = {atom.GetName(): atom.GetIdx() for atom in proposed_oemol.GetAtoms()}

        _logger.debug(f"\t\t\told_res_to_oemol_map: {old_res_to_oemol_map}")
        _logger.debug(f"\t\t\tnew_res_to_oemol_map: {new_res_to_oemol_map}")

        old_oemol_to_res_map = {val: key for key, val in old_res_to_oemol_map.items()}
        new_oemol_to_res_map = {val: key for key, val in new_res_to_oemol_map.items()}

        # HBM - these don't seem to be used anywhere
        #old_res_to_oemol_molecule_map = {atom.index: current_oemol.GetAtom(oechem.OEHasAtomName(atom.name)) for atom in old_res.atoms()}
        #new_res_to_oemol_molecule_map = {atom.index: proposed_oemol.GetAtom(oechem.OEHasAtomName(atom.name)) for atom in new_res.atoms()}



        #initialize_the atom map
        local_atom_map = {}

        #now remove backbones in both molecules and map them separately
        backbone_atoms = ['C', 'CA', 'N', 'O', 'H', 'HA', "H'"]
        # TODO dom make this a seperate function
        old_atoms_to_delete, new_atoms_to_delete = [], []
        for atom in proposed_oemol.GetAtoms():
            if atom.GetName() in backbone_atoms:
                try: #to get the backbone atom with the same naem in the old_oemol_res
                    old_corresponding_backbones = [_atom for _atom in current_oemol.GetAtoms() if _atom.GetName() == atom.GetName()]
                    if old_corresponding_backbones == []:
                        #this is an exception when the old oemol res is a glycine.  if this is the case, then we do not map HA2 or HA3
                        assert set(['HA2', 'HA3']).issubset([_atom.GetName() for _atom in current_oemol.GetAtoms()]), f"old oemol residue is not a GLY template"
                        #we have to map HA3 to HA (old, new)
                        old_corresponding_backbones = [_atom for _atom in current_oemol.GetAtoms() if _atom.GetName() == 'HA3' and atom.GetName() == 'HA']
                    assert len(old_corresponding_backbones) == 1, f"there can only be one corresponding backbone in the old molecule; corresponding backbones: {[atom.GetName() for atom in old_corresponding_backbones]}"
                    old_corresponding_backbone = old_corresponding_backbones[0]
                    if not atom.GetName() == "H'": #throw out the extra H
                        local_atom_map[atom.GetIdx()] = old_corresponding_backbone.GetIdx()
                    old_atoms_to_delete.append(old_corresponding_backbone)
                    new_atoms_to_delete.append(atom)
                    assert proposed_oemol.DeleteAtom(atom), f"failed to delete new_oemol atom {atom}"
                    assert current_oemol.DeleteAtom(old_corresponding_backbone), f"failed to delete old_oemol atom {old_corresponding_backbone}"
                except Exception as e:
                    raise Exception(f"failed to map the backbone separately: {e}")


        _logger.debug(f"\t\t\told_oemol_res names: {[(atom.GetIdx(), atom.GetName()) for atom in current_oemol.GetAtoms()]}")
        _logger.debug(f"\t\t\tnew_oemol_res names: {[(atom.GetIdx(), atom.GetName()) for atom in proposed_oemol.GetAtoms()]}")

        old_sidechain_oemol_indices_to_name = {atom.GetIdx(): atom.GetName() for atom in current_oemol.GetAtoms()}
        new_sidechain_oemol_indices_to_name = {atom.GetIdx(): atom.GetName() for atom in proposed_oemol.GetAtoms()}


        #now we can get the mol atom map of the sidechain
        #NOTE: since the sidechain oemols are NOT zero-indexed anymore, we need to match by name (since they are unique identifiers)
        break_bool = False if old_res_name == 'TRP' or new_res_name == 'TRP' else True # Set allow_ring_breaking to be False if the transformation involves TRP
        _logger.debug(f"\t\t\t allow ring breaking: {break_bool}")
        local_atom_map_nonstereo_sidechain = AtomMapper._get_mol_atom_map(current_oemol, proposed_oemol, map_strength='strong', matching_criterion='name', map_strategy='matching_criterion', allow_ring_breaking=break_bool)

        #check the atom map thus far:
        _logger.debug(f"\t\t\tlocal atom map nonstereo sidechain: {local_atom_map_nonstereo_sidechain}")

        #preserve chirality of the sidechain
        # _logger.warning(f"\t\t\told oemols: {[atom.GetIdx() for atom in self.current_molecule.GetAtoms()]}")
        # _logger.warning(f"\t\t\tnew oemols: {[atom.GetIdx() for atom in new_oemol_res.GetAtoms()]}")
        if local_atom_map_nonstereo_sidechain is not None:
            local_atom_map_stereo_sidechain = AtomMapper.preserve_chirality(current_oemol, proposed_oemol, local_atom_map_nonstereo_sidechain)
        else:
            local_atom_map_stereo_sidechain = {}

        _logger.debug(f"\t\t\tlocal atom map stereo sidechain: {local_atom_map_stereo_sidechain}")

        #fix the sidechain indices w.r.t. full oemol
        sidechain_fixed_map = {}
        mapped_names = []
        for new_sidechain_idx, old_sidechain_idx in local_atom_map_stereo_sidechain.items():
            new_name, old_name = new_sidechain_oemol_indices_to_name[new_sidechain_idx], old_sidechain_oemol_indices_to_name[old_sidechain_idx]
            mapped_names.append((new_name, old_name))
            new_full_oemol_idx, old_full_oemol_idx = new_oemol_name_idx[new_name], old_oemol_name_idx[old_name]
            sidechain_fixed_map[new_full_oemol_idx] = old_full_oemol_idx

        _logger.debug(f"\t\t\toemol sidechain fixed map: {sidechain_fixed_map}")


        #make sure that CB is mapped; otherwise the residue will not be contiguous
        found_CB = False
        if any(item[0] == 'CB' and item[1] == 'CB' for item in mapped_names):
            found_CB = True

        if not found_CB:
            _logger.debug(f"\t\t\tno 'CB' found!!!.  removing local atom map stereo sidechain...")
            sidechain_fixed_map = {}

        _logger.debug(f"\t\t\tthe local atom map (backbone) is {local_atom_map}")
        #update the local map
        local_atom_map.update(sidechain_fixed_map)
        _logger.debug(f"\t\t\tthe local atom map (total) is {local_atom_map}")

        #correct the map
        #now we have to update the atom map indices
        _logger.debug(f"\t\t\tadjusting the atom map with topology indices...")
        topology_index_map = {}
        for new_oemol_idx, old_oemol_idx in local_atom_map.items():
            topology_index_map[new_oemol_to_res_map[new_oemol_idx]] = old_oemol_to_res_map[old_oemol_idx]


        _logger.debug(f"\t\t\ttopology_atom_map: {topology_index_map}")

        mapped_atoms = [(new_res_index_to_name[new_idx], old_res_index_to_name[old_idx]) for new_idx, old_idx in topology_index_map.items()]
        _logger.debug(f"\t\t\tthe mapped atom names are: {mapped_atoms}")

            #and all of the environment atoms should already be handled
        return topology_index_map, old_res_to_oemol_map, new_res_to_oemol_map, local_atom_map, current_oemol, proposed_oemol, old_oemol_res_copy, new_oemol_res_copy

    def _generate_new_tleap_system(self, tleap_prefix, debug_dir, old_topology, new_topology, current_positions, is_complex):
        """
        Generates new system by: 1) mutating in pymol to get the new positions, 2) rearranging positions to match
        the atom order in the new_topology and copying solvent atoms from the old positions, 3) parametrizing the
        new system using tleap.
        
        Parameters
        ----------
        tleap_prefix : str
            Prefix for tleap input and output files
        debug_dir : str
            if specified, debug output files will be saved here
        old_topology : simtk.openmm.app.Topology object
            The old topology
        new_topology : simtk.openmm.app.Topology object
            The new topology
        current_positions : np.array
            The current positions
        is_complex : boolean
            Indicates whether the current system is apo or complex
        Returns
        -------
        new_positions : np.array
            The new positions
        new_system : simtk.openmm.System object
            The new system object        
        """
        
        # Prepare PDB for mutation by removing solvent and renumbering the tleap coordinates
        _logger.info("Prepping for mutation")
        name = 'rbd_ace2' if is_complex else 'rbd'
        prepped_pdb = os.path.join(debug_dir, f"2_{name}_for_mutation.pdb")
        self._prep_for_mutation(tleap_prefix, prepped_pdb, is_complex)

        # Generate PDB of new topology/positions using pymol
        _logger.info("Mutating")
        mutant_position = self._allowed_mutations[0][0] # assume only allowed_mutations only has one mutation
        mutant_residue = self._allowed_mutations[0][1] # assume only allowed_mutations only has one mutation
        mutant_pdb = os.path.join(debug_dir, f"3_{name}_mutant.pdb")
        if os.path.exists(mutant_pdb):
            os.system(f"rm {mutant_pdb}") # Otherwise, pymol will load this file in
#         self._mutate(f"2_{name}_for_mutation.pdb", f'{self._chain_id}/{mutant_position}/', mutant_residue, name)
        mutate_script = resource_filename('perses', 'data/rbd-ace2/3_mutate.py')
        os.system(f"python {mutate_script} {prepped_pdb} {mutant_pdb} {self._chain_id}/{mutant_position}/ {mutant_residue}")
        
        # Prep PDBs for tleap
        _logger.info("Prepping PDBs for tleap")
        new_positions = self._prep_for_tleap(debug_dir, old_topology, new_topology, current_positions, int(mutant_position), mutant_residue, is_complex)
        
        # Edit tleap in file
        tleap_prefix = os.path.join(debug_dir, f"5_{name}_mutant_tleap")
        mutant_template = resource_filename('perses', f'data/rbd-ace2/5_{name}_mutant_template_tleap.in')
        edit_tleap_in_inputs(mutant_template, tleap_prefix, debug_dir)
#         edit_tleap_in_ions(tleap_prefix)
        
        # Generate system using tleap 
        _logger.info("Generating new system")
        _, _, new_system = generate_tleap_system(os.path.join(debug_dir, f"5_{name}_mutant_tleap"))
       
        return new_positions, new_system
    
    def _prep_for_mutation(self, tleap_prefix, output_pdb, is_complex):
        """
        Prepare a PDB for mutation in PyMOL: 1) Load the tleap files for the old system, 2) Rename the chains/residues
        to match the canonical renumbering, 3) Remove solvent
        
        Parameters
        ----------
        tleap_prefix : str
            Prefix for tleap input and output files
        output_pdb : str
            Path to output PDB (prepped for mutation)
        is_complex : boolean
            Indicates whether the current system is apo or complex
        """
        
        import MDAnalysis as mda
        
        prmtop_file = f"{tleap_prefix}.prmtop"
        inpcrd_file = f"{tleap_prefix}.inpcrd"
        ref_file = f"{tleap_prefix}.pdb"

        # Load in the topology from tleap output files
        u = mda.Universe(prmtop_file, inpcrd_file)

        u_dim = mda.Universe(ref_file)
        dimensions = u_dim.dimensions

        # RBD
        rbd = u.select_atoms("index 0-3000")
        new_rbd_resids = [i for i in range(332, 528)]
        rbd.residues.resids = new_rbd_resids

        rbd_glycans = u.select_atoms("index 3001-3234")
        new_rbd_glycan_resids = [527 + i for i in range(1, len(rbd_glycans.residues.resids) + 1)]
        rbd_glycans.residues.resids = new_rbd_glycan_resids
        
        if is_complex:
            # ACE2
            ace2 = u.select_atoms("index 3235-14584")
            new_ace2_resids = [i for i in range(18, 727)]
            ace2.residues.resids = new_ace2_resids

            ace2_glycans = u.select_atoms("index 14585-15971")
            new_ace2_glycan_resids = [726 + i for i in range(1, len(ace2_glycans.residues.resids) + 1)]
            ace2_glycans.residues.resids = new_ace2_glycan_resids

            ace2_ions = u.select_atoms("index 15972-15973")
            new_ace2_ion_resids = [i for i in range(1, len(ace2_ions.residues.resids) + 1)]
            ace2_ions.residues.resids = new_ace2_ion_resids

            # Create the new system by merging each universe
            new_system = mda.Merge(rbd, rbd_glycans, ace2, ace2_glycans, ace2_ions)
    
            # Name each chain
            new_system.segments.segids = ['R', 'X', 'C', 'D', 'E']
            
            name = "rbd_ace2"
        else:
            # Create the new system by merging each universe
            new_system = mda.Merge(rbd, rbd_glycans)
    
            # Name each chain
            new_system.segments.segids = ['R', 'X']
            
            name = "rbd"
            
        new_system.dimensions = dimensions

        # Write out the new system
        new_system.atoms.write(output_pdb)
        
#     def _mutate(self, input_pdb, output_pdb, mutation_selection, mutant_residue, name):
#         """
#         Given a WT PDB and a desired mutation, mutate the PDB in pymol.
        
#         Parameters
#         ----------
#         input_pdb : str
#             Path to PDB to be mutated
#         output_pdb : str
#             Path to PDB after mutation
#         mutation_selection : str
#             Pymol selection string for the residue to be mutated. Example: For Chain R Residue 439, use 'R/439/'
#         mutant_residue : str
#             Three-letter code for the residue to mutate to. Example: For lysine, use 'LYS'
#         name : str
#             Name of the system to be used in the output file. Example: 'rbd_ace2'
   
#         """
        
#         import pymol
#         from pymol import cmd
#         import sys
        
#         d = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
#              'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N',
#              'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W',
#              'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'}

#         # Launch pymol session
#         pymol.pymol_argv = ["pymol", "-qc"] + sys.argv[1:]
#         pymol.finish_launching()

#         # Load RBD (no solvent)
#         cmd.load(input_pdb)

#         # Mutate
#         cmd.wizard("mutagenesis")
#         cmd.do("refresh_wizard")
#         cmd.get_wizard().set_mode(mutant_residue)
#         cmd.get_wizard().do_select(mutation_selection)

#         # Select rotamer
#         cmd.frame(1)
        
#         # Apply the mutation
#         cmd.get_wizard().apply()
#         cmd.set_wizard() # Equivalent to clicking "Done" in the GUI

#         # Save
#         cmd.save(output_pdb)
#         cmd.refresh()
    
    def _prep_for_tleap(self, debug_dir, old_topology, new_topology, current_positions, mutant_position, mutant_residue, is_complex):
        """
        Given a mutated PDB, prepare a PDB for tleap input: 1) Rearrange the mutated PDB positions such that they 
        match the atom ordering in new_topology, 2) Copy the solvent positions from current_positions, 3) Save apo 
        RBD, apo ACE2 (for complex), solvent as separate PDBs.
        
        Parameters
        ----------
        debug_dir : str
            If specified, debug output files will be saved here
        old_topology : simtk.openmm.app.Topology object
            The old topology
        new_topology : simtk.openmm.app.Topology object
            The new topology
        current_positions : np.array
            The current positions
        mutant_position : int
            Position to mutate
        mutant_residue : str
            Three-letter code for the residue to mutate to. Example: For lysine, use 'LYS'
        is_complex : boolean
            Indicates whether the current system is apo or complex
     
        """
        name = 'rbd_ace2' if is_complex else 'rbd'
        
        # Correct atom names in mutated PDB
        # Read lines
        mutant_pdb = os.path.join(debug_dir, f"3_{name}_mutant.pdb")
        with open(mutant_pdb, "r") as f:
            lines = f.readlines()

        # Iterate through lines, copying them over to new list of lines
        new_lines = []
        for line in lines:
            if 'TER' not in line and 'END' not in line and 'REMARK' not in line and 'TITLE' not in line and 'CRYST1' not in line and 'CONECT' not in line:
                current_res_name = line[17:20]
                current_res_id = int(line[23:26])
                if current_res_name == mutant_residue and current_res_id == mutant_position: # Fix atom names in mutant residue
                    atom = line[12:16]
                    if atom[0].isdigit():
                        atom_chars = len(atom[1:].strip(" "))
                        if atom_chars == 2: # if atom name is 2 characters
                            line = line[:12] + ' ' + line[13:15] + line[12] + line[16:]
                        elif atom_chars == 3:
                            line = line[:12] + line[13:16] + line[12] + " " + line[17:]  
                new_lines.append(line)

        # Update mutated PDB with corrected atom lines
        with open(mutant_pdb, 'w') as f:
            f.writelines(new_lines)
        
        # Load mutated (protonated) PDB
        mutated_pdb = app.PDBFile(mutant_pdb)
        mutated_n_atoms = mutated_pdb.topology.getNumAtoms()
        
        # Map atom indices from pymol PDB to atom indices in new_topology
        d_omm = {} # key: (atom name, residue id, chain id), value: atom index
        for atom_omm in tqdm_notebook(new_topology.atoms()):
            d_omm[(atom_omm.name, atom_omm.residue.id, atom_omm.residue.chain.id)] = atom_omm.index

        d_map = {} # key: atom index from pymol mutated PDB, value: atom index in new_topology
        for atom_pymol in tqdm_notebook(mutated_pdb.topology.atoms()):
            match_index = d_omm[(atom_pymol.name, atom_pymol.residue.id, atom_pymol.residue.chain.id)]
            d_map[atom_pymol.index] = match_index

        # Rearrange positions based on new_topology and add units to positions
        dim_1, dim_2 = np.array(mutated_pdb.positions).shape
        mutated_positions = unit.Quantity(np.zeros(shape=(dim_1, dim_2)), unit=unit.nanometers)
        positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in mutated_pdb.positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers)
        for k, v in d_map.items():
            mutated_positions[v] = positions[k]
        
        # Copy solvent positions from old positions
        solvent_atoms = [atom for atom in old_topology.atoms() if atom.residue.chain.id == 'Y']
        first_solvent_atom = solvent_atoms[0].index
        new_positions = unit.Quantity(np.zeros([mutated_n_atoms + len(solvent_atoms), 3]), unit=unit.nanometers)
        new_positions[:mutated_n_atoms, :] = mutated_positions
        new_positions[mutated_n_atoms:, :] = current_positions[first_solvent_atom:]
            
        def save_apo(output_pdb, topology, positions, chains_to_keep):
            modeller = app.Modeller(topology, positions)
            to_delete = []
            for chain in modeller.topology.chains():
                if chain.id not in chains_to_keep:
                    to_delete.append(chain)
            modeller.delete(to_delete)
            app.PDBFile.writeFile(modeller.topology, modeller.positions, open(output_pdb, "w"), keepIds=True)

        # Save apo solute PDBs and then correct for tleap
        rbd_pdb = os.path.join(debug_dir, f"3_{name}_mutant_rbd_tleap.pdb")
        rbd_pdb_final = os.path.join(debug_dir, f'4_{name}_mutant_rbd_tleap_final.pdb')
        save_apo(rbd_pdb, new_topology, new_positions, ['R', 'X'])
        edit_pdb_for_tleap(rbd_pdb, rbd_pdb_final)
        
        solvent_pdb = os.path.join(debug_dir, f"3_{name}_mutant_solvent_tleap.pdb")
        solvent_pdb_final = os.path.join(debug_dir, f'4_{name}_mutant_solvent_tleap_final.pdb')
        save_apo(solvent_pdb, new_topology, new_positions, ['Y'])
        edit_pdb_for_tleap(solvent_pdb, solvent_pdb_final)
        
        if is_complex:
            ace2_pdb = os.path.join(debug_dir, f"3_{name}_mutant_ace2_tleap.pdb")
            ace2_pdb_final = os.path.join(debug_dir, f'4_{name}_mutant_ace2_tleap_final.pdb')
            save_apo(ace2_pdb, new_topology, new_positions, ['C', 'D', 'E'])
            edit_pdb_for_tleap(ace2_pdb, ace2_pdb_final, is_ace2=True)
    
        return new_positions

In [6]:
def edit_pdb_for_tleap(input_pdb, output_pdb, is_ace2=False):
    """
    Edit a PDB file so that it can be loaded into tleap.

    Parameters
    ----------
    input_pdb : str
        Path to input PDB
    output_pdb : str
        Path to output (edited) PDB
    is_ace2 : boolean, default False
        Indicates whether the file is for ACE2
    """
    
    # Read lines
    with open(input_pdb, "r") as f:
        lines = f.readlines()

    # Iterate through lines, copying them over to new list of lines
    glycan_residue_names = ['UYB', '4YB', 'VMB', '2MA', '0YB', '0fA', '0LB']
    new_lines = []
    previous_res_id =  0
    previous_res_name = ''
    for line in lines:
        if 'CONECT' in line: # Skip CONECT lines
            continue
        if 'TER' in line and 'NME' in line:
            continue
        if 'TER' not in line and 'END' not in line and 'REMARK' not in line and 'TITLE' not in line and 'CRYST1' not in line:
            current_res_name = line[17:20]
            current_res_id = int(line[23:26])
            if current_res_id != previous_res_id:
                if previous_res_name in glycan_residue_names:
                    new_lines.append("TER\n") # add TER if the previous residue was a glycan residue
                if previous_res_name == "NME":
                    new_lines.append("TER\n") # add TER after the NME and before starting the next residue
                previous_res_id = current_res_id 
                previous_res_name = current_res_name
            if current_res_name == 'NME': # change C atom in NMEs to CH3
                atom = line[13:16]
                if atom == 'C  ':
                    line = line[:13] + 'CH3 ' + line[17:]
                if atom == 'H1 ':
                    line = line[:12] + 'HH31' + line[16:]
                if atom == 'H2 ':
                    line = line[:12] + 'HH32' + line[16:]
                if atom == 'H3 ':
                    line = line[:12] + 'HH33' + line[16:]
            if is_ace2:
                if current_res_name == 'CYS' and current_res_id not in [261, 498]: # change CYS to CYX
                    line = line[:17] + 'CYX' + line[20:]
            else:
                if current_res_name == 'CYS': # change CYS to CYX
                    line = line[:17] + 'CYX' + line[20:]
                
        new_lines.append(line)

    with open(output_pdb, 'w') as f:
        f.writelines(new_lines)

def edit_tleap_in_inputs(tleap_in_template, tleap_prefix, debug_dir=None):
    """
    Edit the input and output files in the tleap.in file 

    Parameters
    ----------
    tleap_in_template : str
        Template tleap.in file to edit
    tleap_prefix : str
        Prefix for output tleap.in and output tleap files
    debug_dir : str, default None
        If specified, dir to prepend to path of input files
    """
    
    with open(tleap_in_template, "r") as f:
        lines_in = f.readlines()

    new_lines = []
    for line in lines_in:
        if "mol1 = loadpdb" in line:
            if debug_dir:
                linesplit = line.split(" ")
                line = ' '.join(linesplit[:-1]) + f" {os.path.join(debug_dir, linesplit[-1])}"
        if "mol2 = loadpdb" in line:
            if debug_dir:
                linesplit = line.split(" ")
                line = ' '.join(linesplit[:-1]) + f" {os.path.join(debug_dir, linesplit[-1])}"
        if "mol3 = loadpdb" in line:
            if debug_dir:
                linesplit = line.split(" ")
                line = ' '.join(linesplit[:-1]) + f" {os.path.join(debug_dir, linesplit[-1])}"
        if "savepdb" in line:
            linesplit = line.split(" ")
            line = ' '.join(linesplit[:-1]) + f" {tleap_prefix}.pdb\n"
        if "saveamberparm" in line:
            linesplit = line.split(" ")
            line = ' '.join(linesplit[:-2]) + f" {tleap_prefix}.prmtop {tleap_prefix}.inpcrd\n"
        new_lines.append(line)

    with open(f"{tleap_prefix}.in", 'w') as f:
        f.writelines(new_lines)
        
def edit_tleap_in_ions(tleap_prefix):
    """
    Edit the number of ions in the tleap.in file 

    Parameters
    ----------
    tleap_prefix : str
        Prefix for tleap.in file to edit
    """
    
    # Run tleap to determine how many waters will be present in solvent
    with tempfile.TemporaryDirectory() as temp_dir:
        tleap_in_temp = os.path.join(temp_dir, "temp")
        tleap_out_temp = os.path.join(temp_dir, "temp.out")
        edit_tleap_in_inputs(f"{tleap_prefix}.in", tleap_in_temp)
        os.system(f"tleap -s -f {tleap_in_temp}.in > {tleap_out_temp}")
    
        # Retrieve charge and num of waters
        with open(tleap_out_temp, "r") as f:
            lines_out = f.readlines()

        for line in lines_out:
            if "Total unperturbed charge" in line:
                charge = float(line.split(":")[1].strip('\n'))
            if "residues" in line:
                result = re.findall(r"\d*", line)
                result_filtered = [r for r in result if r]
                num_waters = int(result_filtered[0])

    # Compute number of ions (copied from OpenMM)
    numWaters = num_waters
    numPositive = 0
    numNegative = 0 
    totalCharge = charge
    ionicStrength = 0.15

    if totalCharge > 0:
        numNegative += totalCharge
    else:
        numPositive -= totalCharge

    numIons = (numWaters - numPositive - numNegative) * ionicStrength / (55.4)  # Pure water is about 55.4 molar (depending on temperature)
    numPairs = int(floor(numIons + 0.5))
    numPositive += numPairs
    numNegative += numPairs
    print(f"num positive: {numPositive}")
    print(f"num negative: {numNegative}")

    # Edit tleap file
    with open(f"{tleap_prefix}.in", "r") as f:
        lines_in = f.readlines()

    new_lines = []
    for line in lines_in:
        if "addionsrand complex" in line:
            line = f"addionsrand complex Na+ {int(numPositive)} Cl- {int(numNegative)}\n"
        new_lines.append(line)

    with open(f"{tleap_prefix}.in", 'w') as f:
        f.writelines(new_lines)
        
def generate_tleap_system(tleap_prefix, 
                        temperature=300 * unit.kelvin, 
                        nonbonded_method=app.PME, 
                        constraints=app.HBonds, 
                        remove_cm_motion=False, 
                        hydrogen_mass=4.0 * unit.amu):

    """
    Generate a tleap system by 1) running tleap and 2) loading the tleap output prmtop and inpcrd files into openmm

    Parameters
    ----------
    tleap_prefix : str
        Prefix for tleap input and output files
    temperature : unit.kelvin, default 300 * unit.kelvin
        Temperature
    nonbonded_method : simtk.openmm.app.Forcefield subclass object default app.PME
        Nonbonded method
    constraints : simtk.openmm.app.Forcefield subclass object, default app.HBonds
        Bonds that should have constraints
    remove_cm_motion : boolean, default False
        Indicates whether to remove center of mass motion
    hydrogen_mass : unit.amu, default 4.0 * unit.amu
        Hydrogen mass
    Returns
    -------
    prmtop.topology : simtk.openmm.app.Topology object
        Topology loaded from the prmtop file
    inpcrd.positions : np.array
        Positions loaded from the inpcrd file
    system : simtk.openmm.System object
        Tleap generated system as an OpenMM object
    """
    
    # Run tleap
    os.system(f"tleap -s -f {tleap_prefix}.in > {tleap_prefix}.out")

    # Check if tleap was successful
    if not os.path.exists(f"{tleap_prefix}.prmtop"):
        raise Exception(f"tleap parametrization did not complete successfully, check {tleap_prefix}.out for errors")

    # Load prmtop and inpcrd files
    prmtop = AmberPrmtopFile(f"{tleap_prefix}.prmtop")
    inpcrd = AmberInpcrdFile(f"{tleap_prefix}.inpcrd")

    # Generate system
    system = prmtop.createSystem(
        nonbondedMethod=nonbonded_method,
        constraints=constraints,
        temperature=temperature,
        removeCMMotion=remove_cm_motion,
        hydrogenMass=hydrogen_mass
    )

    return prmtop.topology, inpcrd.positions, system



In [7]:
class PointMutationExecutorRBD(PointMutationExecutor):
    def __init__(self,
                 protein_filename,
                 mutation_chain_id,
                 mutation_residue_id,
                 proposed_residue,
                 phase='complex',
                 clean=False,
                 conduct_endstate_validation=True,
                 ligand_input=None,
                 ligand_index=0,
                 water_model='tip3p',
                 ionic_strength=0.15 * unit.molar,
                 forcefield_files=['amber/protein.ff14SB.xml', 'amber/tip3p_standard.xml'],
                 barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50),
                 forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus},
                 periodic_forcefield_kwargs={'nonbondedMethod': app.PME},
                 nonperiodic_forcefield_kwargs=None,
                 small_molecule_forcefields='gaff-2.11',
                 complex_box_dimensions=None,
                 apo_box_dimensions=None,
                 flatten_torsions=False,
                 flatten_exceptions=False,
                 vanilla=True,
                 repartitioned=True,
                 debug_dir=None,
                 **kwargs):
        """
        arguments
            protein_filename : str
                path to protein (to mutate); .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            clean : bool, default False
                whether to clean the PDB for tleap
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory,
                endstate validation cannot and will not be conducted.
            ligand_file : str, default None
                path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb
            ligand_index : int, default 0
                which ligand to use
            water_model : str, default 'tip3p'
                solvent model to use for solvation
            ionic_strength : float * unit.molar, default 0.15 * unit.molar
                the total concentration of ions (both positive and negative) to add using Modeller.
                This does not include ions that are added to neutralize the system.
                Note that only monovalent ions are currently supported.
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME}
                periodic forcefield kwargs for system parametrization
            nonperiodic_forcefield_kwargs : dict, default None
                non-periodic forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization
            complex_box_dimensions : Vec3, default None
                define box dimensions of complex phase;
                if None, padding is 1nm
            apo_box_dimensions :  Vec3, default None
                define box dimensions of apo phase phase;
                if None, padding is 1nm
            flatten_torsions : bool, default False
                in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1
            flatten_exceptions : bool, default False
                in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1
            vanilla : bool, default True
                whether to generate a vanilla HybridTopologyFactory
            repartitioned : bool, default True
                whether to generate a RepartitionedHybridTopologyFactory
            debug_dir : str, default None
                if specified, debug output files will be saved here
        TODO : allow argument for spectator ligands besides the 'ligand_file'
        """
        
        # Make debug directory
        is_temp = False
        if debug_dir:
            if not os.path.exists(debug_dir):
                os.system(f"mkdir {debug_dir}")
        else:
            debug_dir = tempfile.mkdtemp()
            is_temp = True
        
        ## Generate the old topology, positions, and system
        # Prep PDBs for tleap
        _logger.info("Editing PDBs for tleap")
        protein_name = os.path.basename(protein_filename)
        ligand_name = os.path.basename(ligand_input)
        protein_tleap = os.path.join(debug_dir, f"{protein_name[:-4]}_tleap.pdb")
        ligand_tleap = os.path.join(debug_dir, f"{ligand_name[:-4]}_tleap.pdb")
        if clean:
            edit_pdb_for_tleap(protein_filename, protein_tleap)
            edit_pdb_for_tleap(ligand_input, ligand_tleap)
        else:
            os.system(f"cp {protein_filename} {protein_tleap}")
            os.system(f"cp {ligand_input} {ligand_tleap}")
        
        # Edit tleap files
        _logger.info("Editing tleap.in input files")
        apo_tleap_prefix = os.path.join(debug_dir, "1_rbd_tleap")
        complex_tleap_prefix = os.path.join(debug_dir, "1_rbd_ace2_tleap")
        apo_template = resource_filename('perses', 'data/rbd-ace2/1_rbd_template_tleap.in')
        complex_template = resource_filename('perses', 'data/rbd-ace2/1_rbd_ace2_template_tleap.in')
        edit_tleap_in_inputs(apo_template, apo_tleap_prefix, debug_dir)
        edit_tleap_in_inputs(complex_template, complex_tleap_prefix, debug_dir)
        
        _logger.info("Editing tleap.in number of ions")
        edit_tleap_in_ions(apo_tleap_prefix)
        edit_tleap_in_ions(complex_tleap_prefix)
        
        # Generating old systems
        _logger.info("Generating solvated old systems")
        apo_topology, apo_positions, apo_system = generate_tleap_system(apo_tleap_prefix)
        complex_topology, complex_positions, complex_system = generate_tleap_system(complex_tleap_prefix)
        
        # Correct the topologies
        _logger.info("Correcting tleap topologies")
        apo_topology_corrected = self._correct_topology(apo_topology)
        complex_topology_corrected = self._correct_topology(complex_topology, is_apo=False)
                
        # Format inputs for pipeline
        inputs = [[apo_topology_corrected, apo_positions, apo_system, apo_tleap_prefix, False], [complex_topology_corrected, complex_positions, complex_system, complex_tleap_prefix, True]]
        
        # Make system generator -- note this is only for system_generator.forcefield call in PointMutationEngine init
        molecules = []
        self.system_generator = SystemGenerator(forcefields=forcefield_files,
                                                barostat=barostat,
                                                forcefield_kwargs=forcefield_kwargs,
                                                periodic_forcefield_kwargs=periodic_forcefield_kwargs,
                                                nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                                small_molecule_forcefield=small_molecule_forcefields,
                                                molecules=molecules,
                                                cache=None)
        
        # Run pipeline...
        htfs = []
        for (top, pos, sys, tleap_prefix, is_complex) in inputs:
            name = 'rbd_ace2' if is_complex else 'rbd'
            if name == 'rbd_ace2':
                continue
            _logger.info(f"Generating topology proposal for {name}")
            point_mutation_engine = PointMutationEngineRBD(wildtype_topology=top,
                                                         system_generator=self.system_generator,
                                                         chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable)
                                                         max_point_mutants=1,
                                                         residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate
                                                         allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change
                                                         aggregate=True) # Always allow aggregation
            
            topology_proposal, new_positions = point_mutation_engine.propose(sys, top, pos, tleap_prefix, is_complex, debug_dir)
            
            self.new_positions = new_positions
            self.old_positions = pos
#             print("after propose: ", pos[3235])
#             self.old_topology = top
#             self.new_topology = topology_proposal.new_topology
            
#             new_topology = point_mutation_engine.propose(sys, top, pos, tleap_prefix, is_complex, debug_dir)
            
#             self.new_topology = new_topology
            
            factories = []
            if vanilla:
                repartitioned_endstate = None
                self.generate_htf(HybridTopologyFactory, topology_proposal, pos, new_positions, flatten_exceptions, flatten_torsions, repartitioned_endstate, is_complex)
            if repartitioned:
                for repartitioned_endstate in [0, 1]:
                    self.generate_htf(RepartitionedHybridTopologyFactory, topology_proposal, pos, new_positions, flatten_exceptions, flatten_torsions, repartitioned_endstate, is_complex)
          
        if is_temp:
            shutil.rmtree(debug_dir)
    
    def generate_htf(self, factory, topology_proposal, old_positions, new_positions, flatten_exceptions, flatten_torsions, repartitioned_endstate, is_complex):
        htf = factory(topology_proposal=topology_proposal,
                                      current_positions=old_positions,
                                      new_positions=new_positions,
                                      use_dispersion_correction=False,
                                      functions=None,
                                      softcore_alpha=None,
                                      bond_softening_constant=1.0,
                                      angle_softening_constant=1.0,
                                      soften_only_new=False,
                                      neglected_new_angle_terms=[],
                                      neglected_old_angle_terms=[],
                                      softcore_LJ_v2=True,
                                      softcore_electrostatics=True,
                                      softcore_LJ_v2_alpha=0.85,
                                      softcore_electrostatics_alpha=0.3,
                                      softcore_sigma_Q=1.0,
                                      interpolate_old_and_new_14s=flatten_exceptions,
                                      omitted_terms=None,
                                      endstate=repartitioned_endstate,
                                      flatten_torsions=flatten_torsions)
        if is_complex:
            if factory == HybridTopologyFactory:
                self.complex_htf = htf
            elif factory == RepartitionedHybridTopologyFactory:
                if repartitioned_endstate == 0:
                    self.complex_rhtf_0 = htf
                elif repartitioned_endstate == 1:
                    self.complex_rhtf_1 = htf
        else:
            if factory == HybridTopologyFactory:
                self.apo_htf = htf
            elif factory == RepartitionedHybridTopologyFactory:
                if repartitioned_endstate == 0:
                    self.apo_rhtf_0 = htf
                elif repartitioned_endstate == 1:
                    self.apo_rhtf_1 = htf

    def get_complex_rhtf_0(self):
        return self.complex_rhtf_0

    def get_apo_rhtf_0(self):
        return self.apo_rhtf_0
    
    def get_complex_rhtf_1(self):
        return self.complex_rhtf_1

    def get_apo_rhtf_1(self):
        return self.apo_rhtf_1
    
    def _correct_topology(self, original_topology, is_apo=True):
    
        """
        Correct topology to use the right RBD:ACE2 chain and residue ids.
        
        Parameters
        ----------
        original_topology : simtk.openmm.app.Topology object
            The original topology
        is_apo : bool, default True
            Indicates whether the topology is for apo or complex.
        Returns
        -------
        corrected_topology : simtk.openmm.app.Topology object
            The corrected topology
        """
    
        # Create new topology and positions
        corrected_topology = app.Topology()
        corrected_topology.setPeriodicBoxVectors(original_topology.getPeriodicBoxVectors())

        # Create new chains
        corrected_chain_R = corrected_topology.addChain(id="R")
        corrected_chain_X = corrected_topology.addChain(id="X")
        if not is_apo:
            corrected_chain_C = corrected_topology.addChain(id="C")
            corrected_chain_D = corrected_topology.addChain(id="D")
            corrected_chain_E = corrected_topology.addChain(id="E")
        corrected_chain_Y = corrected_topology.addChain(id="Y")

        # Specify the starting residue ids for each chain
        d_current_start = {"C": 18, "E": 1, "R": 332, "X": 528, "D": 727}

        # Copy residues and atoms to corrected topology and create split into multiple chains. 
        # Also rename residues based on d_current_start 
        d_original_to_corrected = {} # Key: atom in original topology, Value: atom in corrected topology 
        for res in original_topology.residues():  
            residue_id = int(res.id)
            if res.name not in ['HOH', 'Na+', 'Cl-']:
                if residue_id <= 196:
                    corrected_res = corrected_topology.addResidue(res.name, corrected_chain_R, id=str(d_current_start["R"]), insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        corrected_atom = corrected_topology.addAtom(atom.name, atom.element, corrected_res)
                        d_original_to_corrected[atom] = corrected_atom
                    d_current_start["R"] += 1
                elif residue_id >= 197 and residue_id <= 206:
                    corrected_res = corrected_topology.addResidue(res.name, corrected_chain_X, id=str(d_current_start["X"]), insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        corrected_atom = corrected_topology.addAtom(atom.name, atom.element, corrected_res)
                        d_original_to_corrected[atom] = corrected_atom
                    d_current_start["X"] += 1
                elif residue_id >= 207 and residue_id <= 915:
                    corrected_res = corrected_topology.addResidue(res.name, corrected_chain_C, id=str(d_current_start["C"]), insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        corrected_atom = corrected_topology.addAtom(atom.name, atom.element, corrected_res)
                        d_original_to_corrected[atom] = corrected_atom
                    d_current_start["C"] += 1
                elif residue_id >= 916 and residue_id <= 973:
                    corrected_res = corrected_topology.addResidue(res.name, corrected_chain_D, id=str(d_current_start["D"]), insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        corrected_atom = corrected_topology.addAtom(atom.name, atom.element, corrected_res)
                        d_original_to_corrected[atom] = corrected_atom
                    d_current_start["D"] += 1
                elif residue_id >= 974 and residue_id <= 975:
                    corrected_res = corrected_topology.addResidue(res.name, corrected_chain_E, id=str(d_current_start["E"]), insertionCode=res.insertionCode)
                    for atom in res.atoms():
                        corrected_atom = corrected_topology.addAtom(atom.name, atom.element, corrected_res)
                        d_original_to_corrected[atom] = corrected_atom
                    d_current_start["E"] += 1
            else:
                corrected_res = corrected_topology.addResidue(res.name, corrected_chain_Y, id=res.id, insertionCode=res.insertionCode)
                for atom in res.atoms():
                    corrected_atom = corrected_topology.addAtom(atom.name, atom.element, corrected_res)
                    d_original_to_corrected[atom] = corrected_atom

        # Copy bonds to corrected topology
        for bond in original_topology.bonds():
            atom_1 = bond[0]
            atom_2 = bond[1]
            atom_1_corrected = d_original_to_corrected[atom_1]
            atom_2_corrected = d_original_to_corrected[atom_2]
            corrected_topology.addBond(atom_1_corrected, atom_2_corrected)
        
        return corrected_topology


In [8]:
# solvent_delivery = PointMutationExecutor2("0_rbd.pdb",
#                         'R', # First chain is the barstar one
#                         '439',
#                         'ALA',
#                         ligand_input="0_ace2.pdb",
#                         ionic_strength=0.05*unit.molar,
#                         flatten_torsions=True,
#                         flatten_exceptions=True
#                        )

rbd_file = resource_filename('perses', 'data/rbd-ace2/0_rbd.pdb')
ace2_file = resource_filename('perses', 'data/rbd-ace2/0_ace2.pdb')


solvent_delivery = PointMutationExecutorRBD(rbd_file,
                        'R', # First chain is the barstar one
                        '439',
                        'LYS',
                        ligand_input=ace2_file,
                        ionic_strength=0.05*unit.molar,
                        flatten_torsions=True,
                        flatten_exceptions=True, 
                        debug_dir='/data/chodera/zhangi/perses_benchmark/neq/14/1/debug/'
                       )


INFO:root:Editing PDBs for tleap
INFO:root:Editing tleap.in input files
INFO:root:Editing tleap.in number of ions


num positive: 50
num negative: 52.0


INFO:root:Generating solvated old systems


num positive: 187.0
num negative: 167


INFO:root:Correcting tleap topologies
INFO:root:Generating topology proposal for rbd
INFO:root:	Conducting polymer point mutation proposal...
INFO:root:Adding new atoms
INFO:proposal_generator:Using matching_criterion to chose best atom map
INFO:proposal_generator:Scaffold has symmetry of 0
INFO:proposal_generator:len [{4: 4, 12: 10, 11: 11}]
INFO:proposal_generator:{4: 4, 12: 10, 11: 11}
INFO:proposal_generator:Only one map so returning that one
INFO:proposal_generator:{4: 4, 12: 10, 11: 11}
INFO:root:Prepping for mutation
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  residx = np.zeros_like(criteria[0], dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  mask = np.zeros(len(group), dtype=np.bool)
  np.array(sorted(unique_bonds)), 4)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1

0it [00:00, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for atom_pymol in tqdm_notebook(mutated_pdb.topology.atoms()):


0it [00:00, ?it/s]

INFO:root:Generating new system
INFO:root:Finishing up topology proposal
INFO:relative:*** Generating vanilla HybridTopologyFactory ***
INFO:relative:Beginning nonbonded method, total particle, barostat, and exceptions retrieval...
INFO:relative:Flattening torsions of unique new/old at lambda = 0/1
INFO:relative:Flattening exceptions of unique new/old at lambda = 0/1
INFO:relative:Old system forces: dict_keys(['HarmonicBondForce', 'HarmonicAngleForce', 'PeriodicTorsionForce', 'NonbondedForce'])
INFO:relative:New system forces: dict_keys(['HarmonicBondForce', 'HarmonicAngleForce', 'PeriodicTorsionForce', 'NonbondedForce'])
INFO:relative:No unknown forces.
INFO:relative:Nonbonded method to be used (i.e. from old system): 4
INFO:relative:Adding and mapping old atoms to hybrid system...
INFO:relative:Adding and mapping new atoms to hybrid system...
INFO:relative:No MonteCarloBarostat added.
INFO:relative:getDefaultPeriodicBoxVectors added to hybrid: [Quantity(value=Vec3(x=7.7354839, y=0.0,

In [17]:
app.PDBFile.writeFile(solvent_delivery.get_apo_htf()._topology_proposal.old_topology, solvent_delivery.old_positions, open("test_old.pdb", "w"), keepIds=True)
app.PDBFile.writeFile(solvent_delivery.get_apo_htf()._topology_proposal.new_topology, solvent_delivery.new_positions, open("test_new.pdb", "w"), keepIds=True)

In [16]:
for atom in solvent_delivery.old_topology.atoms():
    if atom.residue.chain.id == 'Y':
        print(atom)

<Atom 3235 (Na+) of chain 2 residue 206 (Na+)>
<Atom 3236 (Cl-) of chain 2 residue 207 (Cl-)>
<Atom 3237 (Na+) of chain 2 residue 208 (Na+)>
<Atom 3238 (Cl-) of chain 2 residue 209 (Cl-)>
<Atom 3239 (Na+) of chain 2 residue 210 (Na+)>
<Atom 3240 (Cl-) of chain 2 residue 211 (Cl-)>
<Atom 3241 (Na+) of chain 2 residue 212 (Na+)>
<Atom 3242 (Cl-) of chain 2 residue 213 (Cl-)>
<Atom 3243 (Na+) of chain 2 residue 214 (Na+)>
<Atom 3244 (Cl-) of chain 2 residue 215 (Cl-)>
<Atom 3245 (Na+) of chain 2 residue 216 (Na+)>
<Atom 3246 (Cl-) of chain 2 residue 217 (Cl-)>
<Atom 3247 (Na+) of chain 2 residue 218 (Na+)>
<Atom 3248 (Cl-) of chain 2 residue 219 (Cl-)>
<Atom 3249 (Na+) of chain 2 residue 220 (Na+)>
<Atom 3250 (Cl-) of chain 2 residue 221 (Cl-)>
<Atom 3251 (Na+) of chain 2 residue 222 (Na+)>
<Atom 3252 (Cl-) of chain 2 residue 223 (Cl-)>
<Atom 3253 (Na+) of chain 2 residue 224 (Na+)>
<Atom 3254 (Cl-) of chain 2 residue 225 (Cl-)>
<Atom 3255 (Na+) of chain 2 residue 226 (Na+)>
<Atom 3256 (C

<Atom 5418 (H2) of chain 2 residue 1001 (HOH)>
<Atom 5419 (O) of chain 2 residue 1002 (HOH)>
<Atom 5420 (H1) of chain 2 residue 1002 (HOH)>
<Atom 5421 (H2) of chain 2 residue 1002 (HOH)>
<Atom 5422 (O) of chain 2 residue 1003 (HOH)>
<Atom 5423 (H1) of chain 2 residue 1003 (HOH)>
<Atom 5424 (H2) of chain 2 residue 1003 (HOH)>
<Atom 5425 (O) of chain 2 residue 1004 (HOH)>
<Atom 5426 (H1) of chain 2 residue 1004 (HOH)>
<Atom 5427 (H2) of chain 2 residue 1004 (HOH)>
<Atom 5428 (O) of chain 2 residue 1005 (HOH)>
<Atom 5429 (H1) of chain 2 residue 1005 (HOH)>
<Atom 5430 (H2) of chain 2 residue 1005 (HOH)>
<Atom 5431 (O) of chain 2 residue 1006 (HOH)>
<Atom 5432 (H1) of chain 2 residue 1006 (HOH)>
<Atom 5433 (H2) of chain 2 residue 1006 (HOH)>
<Atom 5434 (O) of chain 2 residue 1007 (HOH)>
<Atom 5435 (H1) of chain 2 residue 1007 (HOH)>
<Atom 5436 (H2) of chain 2 residue 1007 (HOH)>
<Atom 5437 (O) of chain 2 residue 1008 (HOH)>
<Atom 5438 (H1) of chain 2 residue 1008 (HOH)>
<Atom 5439 (H2) of c

<Atom 7917 (H2) of chain 2 residue 1834 (HOH)>
<Atom 7918 (O) of chain 2 residue 1835 (HOH)>
<Atom 7919 (H1) of chain 2 residue 1835 (HOH)>
<Atom 7920 (H2) of chain 2 residue 1835 (HOH)>
<Atom 7921 (O) of chain 2 residue 1836 (HOH)>
<Atom 7922 (H1) of chain 2 residue 1836 (HOH)>
<Atom 7923 (H2) of chain 2 residue 1836 (HOH)>
<Atom 7924 (O) of chain 2 residue 1837 (HOH)>
<Atom 7925 (H1) of chain 2 residue 1837 (HOH)>
<Atom 7926 (H2) of chain 2 residue 1837 (HOH)>
<Atom 7927 (O) of chain 2 residue 1838 (HOH)>
<Atom 7928 (H1) of chain 2 residue 1838 (HOH)>
<Atom 7929 (H2) of chain 2 residue 1838 (HOH)>
<Atom 7930 (O) of chain 2 residue 1839 (HOH)>
<Atom 7931 (H1) of chain 2 residue 1839 (HOH)>
<Atom 7932 (H2) of chain 2 residue 1839 (HOH)>
<Atom 7933 (O) of chain 2 residue 1840 (HOH)>
<Atom 7934 (H1) of chain 2 residue 1840 (HOH)>
<Atom 7935 (H2) of chain 2 residue 1840 (HOH)>
<Atom 7936 (O) of chain 2 residue 1841 (HOH)>
<Atom 7937 (H1) of chain 2 residue 1841 (HOH)>
<Atom 7938 (H2) of c

<Atom 10417 (O) of chain 2 residue 2668 (HOH)>
<Atom 10418 (H1) of chain 2 residue 2668 (HOH)>
<Atom 10419 (H2) of chain 2 residue 2668 (HOH)>
<Atom 10420 (O) of chain 2 residue 2669 (HOH)>
<Atom 10421 (H1) of chain 2 residue 2669 (HOH)>
<Atom 10422 (H2) of chain 2 residue 2669 (HOH)>
<Atom 10423 (O) of chain 2 residue 2670 (HOH)>
<Atom 10424 (H1) of chain 2 residue 2670 (HOH)>
<Atom 10425 (H2) of chain 2 residue 2670 (HOH)>
<Atom 10426 (O) of chain 2 residue 2671 (HOH)>
<Atom 10427 (H1) of chain 2 residue 2671 (HOH)>
<Atom 10428 (H2) of chain 2 residue 2671 (HOH)>
<Atom 10429 (O) of chain 2 residue 2672 (HOH)>
<Atom 10430 (H1) of chain 2 residue 2672 (HOH)>
<Atom 10431 (H2) of chain 2 residue 2672 (HOH)>
<Atom 10432 (O) of chain 2 residue 2673 (HOH)>
<Atom 10433 (H1) of chain 2 residue 2673 (HOH)>
<Atom 10434 (H2) of chain 2 residue 2673 (HOH)>
<Atom 10435 (O) of chain 2 residue 2674 (HOH)>
<Atom 10436 (H1) of chain 2 residue 2674 (HOH)>
<Atom 10437 (H2) of chain 2 residue 2674 (HOH)>

<Atom 12916 (O) of chain 2 residue 3501 (HOH)>
<Atom 12917 (H1) of chain 2 residue 3501 (HOH)>
<Atom 12918 (H2) of chain 2 residue 3501 (HOH)>
<Atom 12919 (O) of chain 2 residue 3502 (HOH)>
<Atom 12920 (H1) of chain 2 residue 3502 (HOH)>
<Atom 12921 (H2) of chain 2 residue 3502 (HOH)>
<Atom 12922 (O) of chain 2 residue 3503 (HOH)>
<Atom 12923 (H1) of chain 2 residue 3503 (HOH)>
<Atom 12924 (H2) of chain 2 residue 3503 (HOH)>
<Atom 12925 (O) of chain 2 residue 3504 (HOH)>
<Atom 12926 (H1) of chain 2 residue 3504 (HOH)>
<Atom 12927 (H2) of chain 2 residue 3504 (HOH)>
<Atom 12928 (O) of chain 2 residue 3505 (HOH)>
<Atom 12929 (H1) of chain 2 residue 3505 (HOH)>
<Atom 12930 (H2) of chain 2 residue 3505 (HOH)>
<Atom 12931 (O) of chain 2 residue 3506 (HOH)>
<Atom 12932 (H1) of chain 2 residue 3506 (HOH)>
<Atom 12933 (H2) of chain 2 residue 3506 (HOH)>
<Atom 12934 (O) of chain 2 residue 3507 (HOH)>
<Atom 12935 (H1) of chain 2 residue 3507 (HOH)>
<Atom 12936 (H2) of chain 2 residue 3507 (HOH)>

<Atom 15416 (H1) of chain 2 residue 4334 (HOH)>
<Atom 15417 (H2) of chain 2 residue 4334 (HOH)>
<Atom 15418 (O) of chain 2 residue 4335 (HOH)>
<Atom 15419 (H1) of chain 2 residue 4335 (HOH)>
<Atom 15420 (H2) of chain 2 residue 4335 (HOH)>
<Atom 15421 (O) of chain 2 residue 4336 (HOH)>
<Atom 15422 (H1) of chain 2 residue 4336 (HOH)>
<Atom 15423 (H2) of chain 2 residue 4336 (HOH)>
<Atom 15424 (O) of chain 2 residue 4337 (HOH)>
<Atom 15425 (H1) of chain 2 residue 4337 (HOH)>
<Atom 15426 (H2) of chain 2 residue 4337 (HOH)>
<Atom 15427 (O) of chain 2 residue 4338 (HOH)>
<Atom 15428 (H1) of chain 2 residue 4338 (HOH)>
<Atom 15429 (H2) of chain 2 residue 4338 (HOH)>
<Atom 15430 (O) of chain 2 residue 4339 (HOH)>
<Atom 15431 (H1) of chain 2 residue 4339 (HOH)>
<Atom 15432 (H2) of chain 2 residue 4339 (HOH)>
<Atom 15433 (O) of chain 2 residue 4340 (HOH)>
<Atom 15434 (H1) of chain 2 residue 4340 (HOH)>
<Atom 15435 (H2) of chain 2 residue 4340 (HOH)>
<Atom 15436 (O) of chain 2 residue 4341 (HOH)>

<Atom 17915 (H1) of chain 2 residue 5167 (HOH)>
<Atom 17916 (H2) of chain 2 residue 5167 (HOH)>
<Atom 17917 (O) of chain 2 residue 5168 (HOH)>
<Atom 17918 (H1) of chain 2 residue 5168 (HOH)>
<Atom 17919 (H2) of chain 2 residue 5168 (HOH)>
<Atom 17920 (O) of chain 2 residue 5169 (HOH)>
<Atom 17921 (H1) of chain 2 residue 5169 (HOH)>
<Atom 17922 (H2) of chain 2 residue 5169 (HOH)>
<Atom 17923 (O) of chain 2 residue 5170 (HOH)>
<Atom 17924 (H1) of chain 2 residue 5170 (HOH)>
<Atom 17925 (H2) of chain 2 residue 5170 (HOH)>
<Atom 17926 (O) of chain 2 residue 5171 (HOH)>
<Atom 17927 (H1) of chain 2 residue 5171 (HOH)>
<Atom 17928 (H2) of chain 2 residue 5171 (HOH)>
<Atom 17929 (O) of chain 2 residue 5172 (HOH)>
<Atom 17930 (H1) of chain 2 residue 5172 (HOH)>
<Atom 17931 (H2) of chain 2 residue 5172 (HOH)>
<Atom 17932 (O) of chain 2 residue 5173 (HOH)>
<Atom 17933 (H1) of chain 2 residue 5173 (HOH)>
<Atom 17934 (H2) of chain 2 residue 5173 (HOH)>
<Atom 17935 (O) of chain 2 residue 5174 (HOH)>

<Atom 20415 (H2) of chain 2 residue 6000 (HOH)>
<Atom 20416 (O) of chain 2 residue 6001 (HOH)>
<Atom 20417 (H1) of chain 2 residue 6001 (HOH)>
<Atom 20418 (H2) of chain 2 residue 6001 (HOH)>
<Atom 20419 (O) of chain 2 residue 6002 (HOH)>
<Atom 20420 (H1) of chain 2 residue 6002 (HOH)>
<Atom 20421 (H2) of chain 2 residue 6002 (HOH)>
<Atom 20422 (O) of chain 2 residue 6003 (HOH)>
<Atom 20423 (H1) of chain 2 residue 6003 (HOH)>
<Atom 20424 (H2) of chain 2 residue 6003 (HOH)>
<Atom 20425 (O) of chain 2 residue 6004 (HOH)>
<Atom 20426 (H1) of chain 2 residue 6004 (HOH)>
<Atom 20427 (H2) of chain 2 residue 6004 (HOH)>
<Atom 20428 (O) of chain 2 residue 6005 (HOH)>
<Atom 20429 (H1) of chain 2 residue 6005 (HOH)>
<Atom 20430 (H2) of chain 2 residue 6005 (HOH)>
<Atom 20431 (O) of chain 2 residue 6006 (HOH)>
<Atom 20432 (H1) of chain 2 residue 6006 (HOH)>
<Atom 20433 (H2) of chain 2 residue 6006 (HOH)>
<Atom 20434 (O) of chain 2 residue 6007 (HOH)>
<Atom 20435 (H1) of chain 2 residue 6007 (HOH)>

<Atom 22914 (H2) of chain 2 residue 6833 (HOH)>
<Atom 22915 (O) of chain 2 residue 6834 (HOH)>
<Atom 22916 (H1) of chain 2 residue 6834 (HOH)>
<Atom 22917 (H2) of chain 2 residue 6834 (HOH)>
<Atom 22918 (O) of chain 2 residue 6835 (HOH)>
<Atom 22919 (H1) of chain 2 residue 6835 (HOH)>
<Atom 22920 (H2) of chain 2 residue 6835 (HOH)>
<Atom 22921 (O) of chain 2 residue 6836 (HOH)>
<Atom 22922 (H1) of chain 2 residue 6836 (HOH)>
<Atom 22923 (H2) of chain 2 residue 6836 (HOH)>
<Atom 22924 (O) of chain 2 residue 6837 (HOH)>
<Atom 22925 (H1) of chain 2 residue 6837 (HOH)>
<Atom 22926 (H2) of chain 2 residue 6837 (HOH)>
<Atom 22927 (O) of chain 2 residue 6838 (HOH)>
<Atom 22928 (H1) of chain 2 residue 6838 (HOH)>
<Atom 22929 (H2) of chain 2 residue 6838 (HOH)>
<Atom 22930 (O) of chain 2 residue 6839 (HOH)>
<Atom 22931 (H1) of chain 2 residue 6839 (HOH)>
<Atom 22932 (H2) of chain 2 residue 6839 (HOH)>
<Atom 22933 (O) of chain 2 residue 6840 (HOH)>
<Atom 22934 (H1) of chain 2 residue 6840 (HOH)>

<Atom 25414 (O) of chain 2 residue 7667 (HOH)>
<Atom 25415 (H1) of chain 2 residue 7667 (HOH)>
<Atom 25416 (H2) of chain 2 residue 7667 (HOH)>
<Atom 25417 (O) of chain 2 residue 7668 (HOH)>
<Atom 25418 (H1) of chain 2 residue 7668 (HOH)>
<Atom 25419 (H2) of chain 2 residue 7668 (HOH)>
<Atom 25420 (O) of chain 2 residue 7669 (HOH)>
<Atom 25421 (H1) of chain 2 residue 7669 (HOH)>
<Atom 25422 (H2) of chain 2 residue 7669 (HOH)>
<Atom 25423 (O) of chain 2 residue 7670 (HOH)>
<Atom 25424 (H1) of chain 2 residue 7670 (HOH)>
<Atom 25425 (H2) of chain 2 residue 7670 (HOH)>
<Atom 25426 (O) of chain 2 residue 7671 (HOH)>
<Atom 25427 (H1) of chain 2 residue 7671 (HOH)>
<Atom 25428 (H2) of chain 2 residue 7671 (HOH)>
<Atom 25429 (O) of chain 2 residue 7672 (HOH)>
<Atom 25430 (H1) of chain 2 residue 7672 (HOH)>
<Atom 25431 (H2) of chain 2 residue 7672 (HOH)>
<Atom 25432 (O) of chain 2 residue 7673 (HOH)>
<Atom 25433 (H1) of chain 2 residue 7673 (HOH)>
<Atom 25434 (H2) of chain 2 residue 7673 (HOH)>

<Atom 27913 (O) of chain 2 residue 8500 (HOH)>
<Atom 27914 (H1) of chain 2 residue 8500 (HOH)>
<Atom 27915 (H2) of chain 2 residue 8500 (HOH)>
<Atom 27916 (O) of chain 2 residue 8501 (HOH)>
<Atom 27917 (H1) of chain 2 residue 8501 (HOH)>
<Atom 27918 (H2) of chain 2 residue 8501 (HOH)>
<Atom 27919 (O) of chain 2 residue 8502 (HOH)>
<Atom 27920 (H1) of chain 2 residue 8502 (HOH)>
<Atom 27921 (H2) of chain 2 residue 8502 (HOH)>
<Atom 27922 (O) of chain 2 residue 8503 (HOH)>
<Atom 27923 (H1) of chain 2 residue 8503 (HOH)>
<Atom 27924 (H2) of chain 2 residue 8503 (HOH)>
<Atom 27925 (O) of chain 2 residue 8504 (HOH)>
<Atom 27926 (H1) of chain 2 residue 8504 (HOH)>
<Atom 27927 (H2) of chain 2 residue 8504 (HOH)>
<Atom 27928 (O) of chain 2 residue 8505 (HOH)>
<Atom 27929 (H1) of chain 2 residue 8505 (HOH)>
<Atom 27930 (H2) of chain 2 residue 8505 (HOH)>
<Atom 27931 (O) of chain 2 residue 8506 (HOH)>
<Atom 27932 (H1) of chain 2 residue 8506 (HOH)>
<Atom 27933 (H2) of chain 2 residue 8506 (HOH)>

<Atom 30413 (H1) of chain 2 residue 9333 (HOH)>
<Atom 30414 (H2) of chain 2 residue 9333 (HOH)>
<Atom 30415 (O) of chain 2 residue 9334 (HOH)>
<Atom 30416 (H1) of chain 2 residue 9334 (HOH)>
<Atom 30417 (H2) of chain 2 residue 9334 (HOH)>
<Atom 30418 (O) of chain 2 residue 9335 (HOH)>
<Atom 30419 (H1) of chain 2 residue 9335 (HOH)>
<Atom 30420 (H2) of chain 2 residue 9335 (HOH)>
<Atom 30421 (O) of chain 2 residue 9336 (HOH)>
<Atom 30422 (H1) of chain 2 residue 9336 (HOH)>
<Atom 30423 (H2) of chain 2 residue 9336 (HOH)>
<Atom 30424 (O) of chain 2 residue 9337 (HOH)>
<Atom 30425 (H1) of chain 2 residue 9337 (HOH)>
<Atom 30426 (H2) of chain 2 residue 9337 (HOH)>
<Atom 30427 (O) of chain 2 residue 9338 (HOH)>
<Atom 30428 (H1) of chain 2 residue 9338 (HOH)>
<Atom 30429 (H2) of chain 2 residue 9338 (HOH)>
<Atom 30430 (O) of chain 2 residue 9339 (HOH)>
<Atom 30431 (H1) of chain 2 residue 9339 (HOH)>
<Atom 30432 (H2) of chain 2 residue 9339 (HOH)>
<Atom 30433 (O) of chain 2 residue 9340 (HOH)>

<Atom 32912 (H1) of chain 2 residue 10166 (HOH)>
<Atom 32913 (H2) of chain 2 residue 10166 (HOH)>
<Atom 32914 (O) of chain 2 residue 10167 (HOH)>
<Atom 32915 (H1) of chain 2 residue 10167 (HOH)>
<Atom 32916 (H2) of chain 2 residue 10167 (HOH)>
<Atom 32917 (O) of chain 2 residue 10168 (HOH)>
<Atom 32918 (H1) of chain 2 residue 10168 (HOH)>
<Atom 32919 (H2) of chain 2 residue 10168 (HOH)>
<Atom 32920 (O) of chain 2 residue 10169 (HOH)>
<Atom 32921 (H1) of chain 2 residue 10169 (HOH)>
<Atom 32922 (H2) of chain 2 residue 10169 (HOH)>
<Atom 32923 (O) of chain 2 residue 10170 (HOH)>
<Atom 32924 (H1) of chain 2 residue 10170 (HOH)>
<Atom 32925 (H2) of chain 2 residue 10170 (HOH)>
<Atom 32926 (O) of chain 2 residue 10171 (HOH)>
<Atom 32927 (H1) of chain 2 residue 10171 (HOH)>
<Atom 32928 (H2) of chain 2 residue 10171 (HOH)>
<Atom 32929 (O) of chain 2 residue 10172 (HOH)>
<Atom 32930 (H1) of chain 2 residue 10172 (HOH)>
<Atom 32931 (H2) of chain 2 residue 10172 (HOH)>
<Atom 32932 (O) of chain 2

<Atom 35412 (H2) of chain 2 residue 10999 (HOH)>
<Atom 35413 (O) of chain 2 residue 11000 (HOH)>
<Atom 35414 (H1) of chain 2 residue 11000 (HOH)>
<Atom 35415 (H2) of chain 2 residue 11000 (HOH)>
<Atom 35416 (O) of chain 2 residue 11001 (HOH)>
<Atom 35417 (H1) of chain 2 residue 11001 (HOH)>
<Atom 35418 (H2) of chain 2 residue 11001 (HOH)>
<Atom 35419 (O) of chain 2 residue 11002 (HOH)>
<Atom 35420 (H1) of chain 2 residue 11002 (HOH)>
<Atom 35421 (H2) of chain 2 residue 11002 (HOH)>
<Atom 35422 (O) of chain 2 residue 11003 (HOH)>
<Atom 35423 (H1) of chain 2 residue 11003 (HOH)>
<Atom 35424 (H2) of chain 2 residue 11003 (HOH)>
<Atom 35425 (O) of chain 2 residue 11004 (HOH)>
<Atom 35426 (H1) of chain 2 residue 11004 (HOH)>
<Atom 35427 (H2) of chain 2 residue 11004 (HOH)>
<Atom 35428 (O) of chain 2 residue 11005 (HOH)>
<Atom 35429 (H1) of chain 2 residue 11005 (HOH)>
<Atom 35430 (H2) of chain 2 residue 11005 (HOH)>
<Atom 35431 (O) of chain 2 residue 11006 (HOH)>
<Atom 35432 (H1) of chain 2

<Atom 37911 (H2) of chain 2 residue 11832 (HOH)>
<Atom 37912 (O) of chain 2 residue 11833 (HOH)>
<Atom 37913 (H1) of chain 2 residue 11833 (HOH)>
<Atom 37914 (H2) of chain 2 residue 11833 (HOH)>
<Atom 37915 (O) of chain 2 residue 11834 (HOH)>
<Atom 37916 (H1) of chain 2 residue 11834 (HOH)>
<Atom 37917 (H2) of chain 2 residue 11834 (HOH)>
<Atom 37918 (O) of chain 2 residue 11835 (HOH)>
<Atom 37919 (H1) of chain 2 residue 11835 (HOH)>
<Atom 37920 (H2) of chain 2 residue 11835 (HOH)>
<Atom 37921 (O) of chain 2 residue 11836 (HOH)>
<Atom 37922 (H1) of chain 2 residue 11836 (HOH)>
<Atom 37923 (H2) of chain 2 residue 11836 (HOH)>
<Atom 37924 (O) of chain 2 residue 11837 (HOH)>
<Atom 37925 (H1) of chain 2 residue 11837 (HOH)>
<Atom 37926 (H2) of chain 2 residue 11837 (HOH)>
<Atom 37927 (O) of chain 2 residue 11838 (HOH)>
<Atom 37928 (H1) of chain 2 residue 11838 (HOH)>
<Atom 37929 (H2) of chain 2 residue 11838 (HOH)>
<Atom 37930 (O) of chain 2 residue 11839 (HOH)>
<Atom 37931 (H1) of chain 2

<Atom 40411 (O) of chain 2 residue 12666 (HOH)>
<Atom 40412 (H1) of chain 2 residue 12666 (HOH)>
<Atom 40413 (H2) of chain 2 residue 12666 (HOH)>
<Atom 40414 (O) of chain 2 residue 12667 (HOH)>
<Atom 40415 (H1) of chain 2 residue 12667 (HOH)>
<Atom 40416 (H2) of chain 2 residue 12667 (HOH)>
<Atom 40417 (O) of chain 2 residue 12668 (HOH)>
<Atom 40418 (H1) of chain 2 residue 12668 (HOH)>
<Atom 40419 (H2) of chain 2 residue 12668 (HOH)>
<Atom 40420 (O) of chain 2 residue 12669 (HOH)>
<Atom 40421 (H1) of chain 2 residue 12669 (HOH)>
<Atom 40422 (H2) of chain 2 residue 12669 (HOH)>
<Atom 40423 (O) of chain 2 residue 12670 (HOH)>
<Atom 40424 (H1) of chain 2 residue 12670 (HOH)>
<Atom 40425 (H2) of chain 2 residue 12670 (HOH)>
<Atom 40426 (O) of chain 2 residue 12671 (HOH)>
<Atom 40427 (H1) of chain 2 residue 12671 (HOH)>
<Atom 40428 (H2) of chain 2 residue 12671 (HOH)>
<Atom 40429 (O) of chain 2 residue 12672 (HOH)>
<Atom 40430 (H1) of chain 2 residue 12672 (HOH)>
<Atom 40431 (H2) of chain 2

<Atom 42910 (O) of chain 2 residue 13499 (HOH)>
<Atom 42911 (H1) of chain 2 residue 13499 (HOH)>
<Atom 42912 (H2) of chain 2 residue 13499 (HOH)>
<Atom 42913 (O) of chain 2 residue 13500 (HOH)>
<Atom 42914 (H1) of chain 2 residue 13500 (HOH)>
<Atom 42915 (H2) of chain 2 residue 13500 (HOH)>
<Atom 42916 (O) of chain 2 residue 13501 (HOH)>
<Atom 42917 (H1) of chain 2 residue 13501 (HOH)>
<Atom 42918 (H2) of chain 2 residue 13501 (HOH)>
<Atom 42919 (O) of chain 2 residue 13502 (HOH)>
<Atom 42920 (H1) of chain 2 residue 13502 (HOH)>
<Atom 42921 (H2) of chain 2 residue 13502 (HOH)>
<Atom 42922 (O) of chain 2 residue 13503 (HOH)>
<Atom 42923 (H1) of chain 2 residue 13503 (HOH)>
<Atom 42924 (H2) of chain 2 residue 13503 (HOH)>
<Atom 42925 (O) of chain 2 residue 13504 (HOH)>
<Atom 42926 (H1) of chain 2 residue 13504 (HOH)>
<Atom 42927 (H2) of chain 2 residue 13504 (HOH)>
<Atom 42928 (O) of chain 2 residue 13505 (HOH)>
<Atom 42929 (H1) of chain 2 residue 13505 (HOH)>
<Atom 42930 (H2) of chain 2

<Atom 45410 (H1) of chain 2 residue 14332 (HOH)>
<Atom 45411 (H2) of chain 2 residue 14332 (HOH)>
<Atom 45412 (O) of chain 2 residue 14333 (HOH)>
<Atom 45413 (H1) of chain 2 residue 14333 (HOH)>
<Atom 45414 (H2) of chain 2 residue 14333 (HOH)>
<Atom 45415 (O) of chain 2 residue 14334 (HOH)>
<Atom 45416 (H1) of chain 2 residue 14334 (HOH)>
<Atom 45417 (H2) of chain 2 residue 14334 (HOH)>
<Atom 45418 (O) of chain 2 residue 14335 (HOH)>
<Atom 45419 (H1) of chain 2 residue 14335 (HOH)>
<Atom 45420 (H2) of chain 2 residue 14335 (HOH)>
<Atom 45421 (O) of chain 2 residue 14336 (HOH)>
<Atom 45422 (H1) of chain 2 residue 14336 (HOH)>
<Atom 45423 (H2) of chain 2 residue 14336 (HOH)>
<Atom 45424 (O) of chain 2 residue 14337 (HOH)>
<Atom 45425 (H1) of chain 2 residue 14337 (HOH)>
<Atom 45426 (H2) of chain 2 residue 14337 (HOH)>
<Atom 45427 (O) of chain 2 residue 14338 (HOH)>
<Atom 45428 (H1) of chain 2 residue 14338 (HOH)>
<Atom 45429 (H2) of chain 2 residue 14338 (HOH)>
<Atom 45430 (O) of chain 2

<Atom 47909 (H1) of chain 2 residue 15165 (HOH)>
<Atom 47910 (H2) of chain 2 residue 15165 (HOH)>
<Atom 47911 (O) of chain 2 residue 15166 (HOH)>
<Atom 47912 (H1) of chain 2 residue 15166 (HOH)>
<Atom 47913 (H2) of chain 2 residue 15166 (HOH)>
<Atom 47914 (O) of chain 2 residue 15167 (HOH)>
<Atom 47915 (H1) of chain 2 residue 15167 (HOH)>
<Atom 47916 (H2) of chain 2 residue 15167 (HOH)>
<Atom 47917 (O) of chain 2 residue 15168 (HOH)>
<Atom 47918 (H1) of chain 2 residue 15168 (HOH)>
<Atom 47919 (H2) of chain 2 residue 15168 (HOH)>
<Atom 47920 (O) of chain 2 residue 15169 (HOH)>
<Atom 47921 (H1) of chain 2 residue 15169 (HOH)>
<Atom 47922 (H2) of chain 2 residue 15169 (HOH)>
<Atom 47923 (O) of chain 2 residue 15170 (HOH)>
<Atom 47924 (H1) of chain 2 residue 15170 (HOH)>
<Atom 47925 (H2) of chain 2 residue 15170 (HOH)>
<Atom 47926 (O) of chain 2 residue 15171 (HOH)>
<Atom 47927 (H1) of chain 2 residue 15171 (HOH)>
<Atom 47928 (H2) of chain 2 residue 15171 (HOH)>
<Atom 47929 (O) of chain 2

<Atom 50409 (H2) of chain 2 residue 15998 (HOH)>
<Atom 50410 (O) of chain 2 residue 15999 (HOH)>
<Atom 50411 (H1) of chain 2 residue 15999 (HOH)>
<Atom 50412 (H2) of chain 2 residue 15999 (HOH)>
<Atom 50413 (O) of chain 2 residue 16000 (HOH)>
<Atom 50414 (H1) of chain 2 residue 16000 (HOH)>
<Atom 50415 (H2) of chain 2 residue 16000 (HOH)>
<Atom 50416 (O) of chain 2 residue 16001 (HOH)>
<Atom 50417 (H1) of chain 2 residue 16001 (HOH)>
<Atom 50418 (H2) of chain 2 residue 16001 (HOH)>
<Atom 50419 (O) of chain 2 residue 16002 (HOH)>
<Atom 50420 (H1) of chain 2 residue 16002 (HOH)>
<Atom 50421 (H2) of chain 2 residue 16002 (HOH)>
<Atom 50422 (O) of chain 2 residue 16003 (HOH)>
<Atom 50423 (H1) of chain 2 residue 16003 (HOH)>
<Atom 50424 (H2) of chain 2 residue 16003 (HOH)>
<Atom 50425 (O) of chain 2 residue 16004 (HOH)>
<Atom 50426 (H1) of chain 2 residue 16004 (HOH)>
<Atom 50427 (H2) of chain 2 residue 16004 (HOH)>
<Atom 50428 (O) of chain 2 residue 16005 (HOH)>
<Atom 50429 (H1) of chain 2

<Atom 52908 (H2) of chain 2 residue 16831 (HOH)>
<Atom 52909 (O) of chain 2 residue 16832 (HOH)>
<Atom 52910 (H1) of chain 2 residue 16832 (HOH)>
<Atom 52911 (H2) of chain 2 residue 16832 (HOH)>
<Atom 52912 (O) of chain 2 residue 16833 (HOH)>
<Atom 52913 (H1) of chain 2 residue 16833 (HOH)>
<Atom 52914 (H2) of chain 2 residue 16833 (HOH)>
<Atom 52915 (O) of chain 2 residue 16834 (HOH)>
<Atom 52916 (H1) of chain 2 residue 16834 (HOH)>
<Atom 52917 (H2) of chain 2 residue 16834 (HOH)>
<Atom 52918 (O) of chain 2 residue 16835 (HOH)>
<Atom 52919 (H1) of chain 2 residue 16835 (HOH)>
<Atom 52920 (H2) of chain 2 residue 16835 (HOH)>
<Atom 52921 (O) of chain 2 residue 16836 (HOH)>
<Atom 52922 (H1) of chain 2 residue 16836 (HOH)>
<Atom 52923 (H2) of chain 2 residue 16836 (HOH)>
<Atom 52924 (O) of chain 2 residue 16837 (HOH)>
<Atom 52925 (H1) of chain 2 residue 16837 (HOH)>
<Atom 52926 (H2) of chain 2 residue 16837 (HOH)>
<Atom 52927 (O) of chain 2 residue 16838 (HOH)>
<Atom 52928 (H1) of chain 2

<Atom 55408 (O) of chain 2 residue 17665 (HOH)>
<Atom 55409 (H1) of chain 2 residue 17665 (HOH)>
<Atom 55410 (H2) of chain 2 residue 17665 (HOH)>
<Atom 55411 (O) of chain 2 residue 17666 (HOH)>
<Atom 55412 (H1) of chain 2 residue 17666 (HOH)>
<Atom 55413 (H2) of chain 2 residue 17666 (HOH)>
<Atom 55414 (O) of chain 2 residue 17667 (HOH)>
<Atom 55415 (H1) of chain 2 residue 17667 (HOH)>
<Atom 55416 (H2) of chain 2 residue 17667 (HOH)>
<Atom 55417 (O) of chain 2 residue 17668 (HOH)>
<Atom 55418 (H1) of chain 2 residue 17668 (HOH)>
<Atom 55419 (H2) of chain 2 residue 17668 (HOH)>
<Atom 55420 (O) of chain 2 residue 17669 (HOH)>
<Atom 55421 (H1) of chain 2 residue 17669 (HOH)>
<Atom 55422 (H2) of chain 2 residue 17669 (HOH)>
<Atom 55423 (O) of chain 2 residue 17670 (HOH)>
<Atom 55424 (H1) of chain 2 residue 17670 (HOH)>
<Atom 55425 (H2) of chain 2 residue 17670 (HOH)>
<Atom 55426 (O) of chain 2 residue 17671 (HOH)>
<Atom 55427 (H1) of chain 2 residue 17671 (HOH)>
<Atom 55428 (H2) of chain 2

<Atom 57907 (O) of chain 2 residue 18498 (HOH)>
<Atom 57908 (H1) of chain 2 residue 18498 (HOH)>
<Atom 57909 (H2) of chain 2 residue 18498 (HOH)>
<Atom 57910 (O) of chain 2 residue 18499 (HOH)>
<Atom 57911 (H1) of chain 2 residue 18499 (HOH)>
<Atom 57912 (H2) of chain 2 residue 18499 (HOH)>
<Atom 57913 (O) of chain 2 residue 18500 (HOH)>
<Atom 57914 (H1) of chain 2 residue 18500 (HOH)>
<Atom 57915 (H2) of chain 2 residue 18500 (HOH)>
<Atom 57916 (O) of chain 2 residue 18501 (HOH)>
<Atom 57917 (H1) of chain 2 residue 18501 (HOH)>
<Atom 57918 (H2) of chain 2 residue 18501 (HOH)>
<Atom 57919 (O) of chain 2 residue 18502 (HOH)>
<Atom 57920 (H1) of chain 2 residue 18502 (HOH)>
<Atom 57921 (H2) of chain 2 residue 18502 (HOH)>
<Atom 57922 (O) of chain 2 residue 18503 (HOH)>
<Atom 57923 (H1) of chain 2 residue 18503 (HOH)>
<Atom 57924 (H2) of chain 2 residue 18503 (HOH)>
<Atom 57925 (O) of chain 2 residue 18504 (HOH)>
<Atom 57926 (H1) of chain 2 residue 18504 (HOH)>
<Atom 57927 (H2) of chain 2

In [9]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/1/"

In [10]:
apo_htf = solvent_delivery.get_apo_htf()

In [11]:
with open(os.path.join(outdir, "1_apo.pickle"), "wb") as f:
    pickle.dump(apo_htf, f)

In [17]:
complex_htf = solvent_delivery.get_complex_htf()

AttributeError: 'PointMutationExecutorRBD' object has no attribute 'complex_htf'

In [None]:
with open(os.path.join(outdir, "1_complex.pickle"), "wb") as f:
    pickle.dump(complex_htf, f)

In [12]:
apo_rhtf_0 = solvent_delivery.get_apo_rhtf_0()

In [13]:
with open(os.path.join(outdir, "1_apo_0.pickle"), "wb") as f:
    pickle.dump(apo_rhtf_0, f)

In [None]:
complex_rhtf_0 = solvent_delivery.get_complex_rhtf_0()

In [112]:
with open(os.path.join(outdir, "1_complex_0.pickle"), "wb") as f:
    pickle.dump(complex_rhtf_0, f)

In [14]:
apo_rhtf_1 = solvent_delivery.get_apo_rhtf_1()

In [15]:
with open(os.path.join(outdir, "1_apo_1.pickle"), "wb") as f:
    pickle.dump(apo_rhtf_1, f)

In [115]:
complex_rhtf_1 = solvent_delivery.get_complex_rhtf_1()

In [116]:
with open(os.path.join(outdir, "1_complex_1.pickle"), "wb") as f:
    pickle.dump(complex_rhtf_1, f)

In [5]:
with open(os.path.join(outdir, "1_complex_1.pickle"), "rb") as f:
    htf = pickle.load(f)

In [7]:
app.PDBFile.writeFile(htf._topology_proposal.new_topology, htf.new_positions(htf.hybrid_positions), open("test.pdb", "w"), keepIds=True)

In [8]:
for bond in htf._topology_proposal.new_topology.bonds():
    if bond[0].name == 'SG' and bond[1].name == 'SG':
        print(bond)

Bond(<Atom 2312 (SG) of chain 0 residue 148 (CYS)>, <Atom 2415 (SG) of chain 0 residue 156 (CYS)>)
Bond(<Atom 911 (SG) of chain 0 residue 59 (CYS)>, <Atom 2993 (SG) of chain 0 residue 193 (CYS)>)
Bond(<Atom 732 (SG) of chain 0 residue 47 (CYS)>, <Atom 1536 (SG) of chain 0 residue 100 (CYS)>)
Bond(<Atom 60 (SG) of chain 0 residue 4 (CYS)>, <Atom 463 (SG) of chain 0 residue 29 (CYS)>)
Bond(<Atom 5054 (SG) of chain 2 residue 321 (CYS)>, <Atom 5164 (SG) of chain 2 residue 329 (CYS)>)
Bond(<Atom 8407 (SG) of chain 2 residue 532 (CYS)>, <Atom 8674 (SG) of chain 2 residue 549 (CYS)>)
Bond(<Atom 11411 (SG) of chain 2 residue 718 (CYS)>, <Atom 11591 (SG) of chain 2 residue 730 (CYS)>)


In [36]:
new_topology = solvent_delivery.new_topology
old_topology = solvent_delivery.old_topology
current_positions = solvent_delivery.old_positions

In [37]:
# Load mutated (protonated) PDB
mutated_pdb = app.PDBFile("/data/chodera/zhangi/perses_benchmark/neq/14/1/debug/3_rbd_mutant.pdb")
mutated_n_atoms = mutated_pdb.topology.getNumAtoms()

# Map atom indices from pymol PDB to atom indices in new_topology
d_omm = {} # key: (atom name, residue id, chain id), value: atom index
for atom_omm in tqdm_notebook(new_topology.atoms()):
    d_omm[(atom_omm.name, atom_omm.residue.id, atom_omm.residue.chain.id)] = atom_omm.index

d_map = {} # key: atom index from pymol mutated PDB, value: atom index in new_topology
for atom_pymol in tqdm_notebook(mutated_pdb.topology.atoms()):
    match_index = d_omm[(atom_pymol.name, atom_pymol.residue.id, atom_pymol.residue.chain.id)]
    d_map[atom_pymol.index] = match_index

# Rearrange positions based on new_topology and add units to positions
dim_1, dim_2 = np.array(mutated_pdb.positions).shape
mutated_positions = unit.Quantity(np.zeros(shape=(dim_1, dim_2)), unit=unit.nanometers)
positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in mutated_pdb.positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers)
for k, v in d_map.items():
    mutated_positions[v] = positions[k]



Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for atom_omm in tqdm_notebook(new_topology.atoms()):


0it [00:00, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for atom_pymol in tqdm_notebook(mutated_pdb.topology.atoms()):


0it [00:00, ?it/s]

In [38]:
# Copy solvent positions from old positions
solvent_atoms = [atom for atom in old_topology.atoms() if atom.residue.chain.id == 'Y']
first_solvent_atom = solvent_atoms[0].index
new_positions = unit.Quantity(np.zeros([mutated_n_atoms + len(solvent_atoms), 3]), unit=unit.nanometers)
new_positions[:mutated_n_atoms, :] = mutated_positions
new_positions[mutated_n_atoms:, :] = current_positions[first_solvent_atom:]

In [39]:
solvent_atoms

[<Atom 3235 (Na+) of chain 2 residue 206 (Na+)>,
 <Atom 3236 (Cl-) of chain 2 residue 207 (Cl-)>,
 <Atom 3237 (Na+) of chain 2 residue 208 (Na+)>,
 <Atom 3238 (Cl-) of chain 2 residue 209 (Cl-)>,
 <Atom 3239 (Na+) of chain 2 residue 210 (Na+)>,
 <Atom 3240 (Cl-) of chain 2 residue 211 (Cl-)>,
 <Atom 3241 (Na+) of chain 2 residue 212 (Na+)>,
 <Atom 3242 (Cl-) of chain 2 residue 213 (Cl-)>,
 <Atom 3243 (Na+) of chain 2 residue 214 (Na+)>,
 <Atom 3244 (Cl-) of chain 2 residue 215 (Cl-)>,
 <Atom 3245 (Na+) of chain 2 residue 216 (Na+)>,
 <Atom 3246 (Cl-) of chain 2 residue 217 (Cl-)>,
 <Atom 3247 (Na+) of chain 2 residue 218 (Na+)>,
 <Atom 3248 (Cl-) of chain 2 residue 219 (Cl-)>,
 <Atom 3249 (Na+) of chain 2 residue 220 (Na+)>,
 <Atom 3250 (Cl-) of chain 2 residue 221 (Cl-)>,
 <Atom 3251 (Na+) of chain 2 residue 222 (Na+)>,
 <Atom 3252 (Cl-) of chain 2 residue 223 (Cl-)>,
 <Atom 3253 (Na+) of chain 2 residue 224 (Na+)>,
 <Atom 3254 (Cl-) of chain 2 residue 225 (Cl-)>,
 <Atom 3255 (Na+) of

In [40]:
first_solvent_atom

3235

In [41]:
current_positions[3235]

Quantity(value=Vec3(x=13.27288, y=81.255762, z=36.190162), unit=angstrom)

In [42]:
new_positions[mutated_n_atoms]

Quantity(value=array([1.327288 , 8.1255762, 3.6190162]), unit=nanometer)

In [43]:
app.PDBFile.writeFile(new_topology, new_positions, open("test_outside.pdb", 'w'), keepIds=True)