In [11]:
from perses.app.relative_point_mutation_setup import PointMutationExecutor
import pickle

In [13]:
from __future__ import absolute_import

from perses.utils.openeye import createOEMolFromSDF, extractPositionsFromOEMol
from perses.annihilation.relative import HybridTopologyFactory, RepartitionedHybridTopologyFactory
from perses.rjmc.topology_proposal import PointMutationEngine
from perses.rjmc.geometry import FFAllAngleGeometryEngine

import simtk.openmm as openmm
import simtk.openmm.app as app
import simtk.unit as unit
import numpy as np
from openmoltools import forcefield_generators
import mdtraj as md
from openmmtools.constants import kB
from perses.tests.utils import validate_endstate_energies
from openff.toolkit.topology import Molecule
from openmmforcefields.generators import SystemGenerator

ENERGY_THRESHOLD = 1e-2
temperature = 300 * unit.kelvin
kT = kB * temperature
beta = 1.0/kT
ring_amino_acids = ['TYR', 'PHE', 'TRP', 'PRO', 'HIS']

# Set up logger
import logging
_logger = logging.getLogger("setup")
_logger.setLevel(logging.INFO)

class PointMutationExecutor2(PointMutationExecutor):
     def __init__(self,
                 protein_filename,
                 mutation_chain_id,
                 mutation_residue_id,
                 proposed_residue,
                 phase='complex',
                 conduct_endstate_validation=True,
                 ligand_input=None,
                 ligand_index=0,
                 allow_undefined_stereo_sdf=False,
                 water_model='tip3p',
                 ionic_strength=0.15 * unit.molar,
                 forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
                 barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50),
                 forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus},
                 periodic_forcefield_kwargs={'nonbondedMethod': app.PME},
                 nonperiodic_forcefield_kwargs=None,
                 small_molecule_forcefields='gaff-2.11',
                 complex_box_dimensions=None,
                 apo_box_dimensions=None,
                 flatten_torsions=False,
                 flatten_exceptions=False,
                 generate_unmodified_hybrid_topology_factory=True,
                 generate_rest_capable_hybrid_topology_factory=False,
                 **kwargs):
        """
        arguments
            protein_filename : str
                path to protein (to mutate); .pdb
            mutation_chain_id : str
                name of the chain to be mutated
            mutation_residue_id : str
                residue id to change
            proposed_residue : str
                three letter code of the residue to mutate to
            phase : str, default complex
                if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
            conduct_endstate_validation : bool, default True
                whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory,
                endstate validation cannot and will not be conducted.
            ligand_input : str, default None
                path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb
            ligand_index : int, default 0
                which ligand to use
            allow_undefined_stereo_sdf : bool, default False
                whether to allow an SDF file to contain undefined stereocenters
            water_model : str, default 'tip3p'
                solvent model to use for solvation
            ionic_strength : float * unit.molar, default 0.15 * unit.molar
                the total concentration of ions (both positive and negative) to add using Modeller.
                This does not include ions that are added to neutralize the system.
                Note that only monovalent ions are currently supported.
            forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
                forcefield files for proteins and solvent
            barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
                barostat to use
            forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
                forcefield kwargs for system parametrization
            periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME}
                periodic forcefield kwargs for system parametrization
            nonperiodic_forcefield_kwargs : dict, default None
                non-periodic forcefield kwargs for system parametrization
            small_molecule_forcefields : str, default 'gaff-2.11'
                the forcefield string for small molecule parametrization
            complex_box_dimensions : Vec3, default None
                define box dimensions of complex phase;
                if None, padding is 1nm
            apo_box_dimensions :  Vec3, default None
                define box dimensions of apo phase phase;
                if None, padding is 1nm
            flatten_torsions : bool, default False
                in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1
            flatten_exceptions : bool, default False
                in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1
            generate_unmodified_hybrid_topology_factory : bool, default True
                whether to generate a vanilla HybridTopologyFactory
            generate_rest_capable_hybrid_topology_factory : bool, default False
                whether to generate a RepartitionedHybridTopologyFactory
        TODO : allow argument for spectator ligands besides the 'ligand_file'
        """
        from openeye import oechem

        # First thing to do is load the apo protein to mutate...
        protein_pdbfile = open(protein_filename, 'r')
        protein_pdb = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()
        protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology)
        protein_topology = protein_md_topology.to_openmm()
        protein_n_atoms = protein_md_topology.n_atoms

        # Load the ligand, if present
        molecules = []
        if ligand_input:
            if isinstance(ligand_input, str):
                if ligand_input.endswith('.sdf'): # small molecule
                        ligand_mol = createOEMolFromSDF(ligand_file, index=ligand_index, allow_undefined_stereo=allow_undefined_stereo_sdf)
                        molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
                        ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol),  forcefield_generators.generateTopologyFromOEMol(ligand_mol)
                        ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                        ligand_n_atoms = ligand_md_topology.n_atoms

                if ligand_input.endswith('pdb'): # protein
                    ligand_pdbfile = open(ligand_input, 'r')
                    ligand_pdb = app.PDBFile(ligand_pdbfile)
                    ligand_pdbfile.close()
                    ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(
                        ligand_pdb.topology)
                    ligand_n_atoms = ligand_md_topology.n_atoms

            elif isinstance(ligand_input, oechem.OEMol): # oemol object
                molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False))
                ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input),  forcefield_generators.generateTopologyFromOEMol(ligand_input)
                ligand_md_topology = md.Topology.from_openmm(ligand_topology)
                ligand_n_atoms = ligand_md_topology.n_atoms

            else:
                _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file')
                return

            # Now create a complex
            complex_md_topology = protein_md_topology.join(ligand_md_topology)
            complex_topology = complex_md_topology.to_openmm()
            complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers)
            complex_positions[:protein_n_atoms, :] = protein_positions
            complex_positions[protein_n_atoms:, :] = ligand_positions
            
            # Convert positions back to openmm vec3 objects
            complex_positions_vec3 = []
            for position in complex_positions:
                complex_positions_vec3.append(openmm.Vec3(*position.value_in_unit_system(unit.md_unit_system)))
            complex_positions = unit.Quantity(value=complex_positions_vec3, unit=unit.nanometer)
            self.complex_topology = complex_topology
            self.complex_positions = complex_positions

In [14]:
# Generate htf
solvent_delivery = PointMutationExecutor2("../../input/rbd_protonated.pdb",
                        '1', # First chain is the barstar one
                        '501',
                        'TYR',
                        ligand_input="../../input/ace2_protonated.pdb",
                        forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml', '/home/zhangi/choderalab/openmmforcefields/amber/ffxml/GLYCAM_06j-1.xml'],
                        flatten_torsions=True,
                        flatten_exceptions=True,
                        generate_unmodified_hybrid_topology_factory=True,
                        generate_rest_capable_hybrid_topology_factory=True,
                        conduct_endstate_validation=False
                        )

In [16]:
# Load the ffxmls
ffxml = ['/home/zhangi/choderalab/openmmforcefields/amber/ffxml/protein.ff14SB.xml', "/home/zhangi/choderalab/openmmforcefields/amber/ffxml/GLYCAM_06j-1.xml", "/home/zhangi/choderalab/openmmforcefields/amber/ffxml/tip3p_standard.xml"]
ff = app.ForceField(*ffxml)

In [17]:
modeller = app.Modeller(solvent_delivery.complex_topology, solvent_delivery.complex_positions)
modeller.addSolvent(ff, padding=0.9 * unit.nanometers, ionicStrength=0.15 * unit.molar)


In [18]:
app.PDBFile.writeFile(modeller.topology, modeller.positions, open("/data/chodera/zhangi/perses_benchmark/neq/14/113/complex_solvated_fixed.pdb", "w"), keepIds=True)