## Run BLUES simulation with OpenMM modeller as input

In [2]:
from blues.moves import SideChainMove
from blues.moves import MoveEngine
from blues.simulation import *
import json
from blues.settings import *


In [3]:
# Parse a YAML configuration, return as Dict
cfg = Settings('barnase_barstar_complex_solvent.yaml').asDict()


/data/chodera/zhangi/perses_benchmark/blues/barnase_barstar/complex/barnase_barstar


In [4]:
# Create OpenMM system generator
from openmmforcefields.generators import SystemGenerator
import simtk.unit as unit
from simtk.openmm import app
temperature = 300 * unit.kelvin
forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}
periodic_forcefield_kwargs = {'nonbondedMethod': app.PME}
nonperiodic_forcefield_kwargs = None
small_molecule_forcefields = 'gaff-2.11'
molecules = []
system_generator = SystemGenerator(forcefields=forcefield_files,
                                        forcefield_kwargs=forcefield_kwargs,
                                        periodic_forcefield_kwargs=periodic_forcefield_kwargs,
                                        nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
                                        small_molecule_forcefield=small_molecule_forcefields,
                                        molecules=molecules,
                                        cache=None)

In [5]:
# Create modeller object with solvated protein
pdb = app.PDBFile("../input/mmc2_complex.pdb")
modeller = app.Modeller(pdb.topology, pdb.positions)
modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=0.9 * unit.nanometers, ionicStrength=0.15 * unit.molar)


In [6]:
# Overwrite the parmed structure object in the cfg dict with modeller object
cfg['Structure'] = modeller

In [7]:
# Initialize structure variable to be used in later BLUES calls
structure = cfg['Structure']

In [8]:
## Subclass the BLUES SideChainMove object to avoid using parmed.Structure object 
## and handle an OpenMM modeller object instead (Changes involve how model positions are set)

class SideChainMoveOpenMM(SideChainMove):  
    
    def move(self, context, verbose=False):
        """Rotates the target atoms around a selected bond by angle theta and updates
        the atom coordinates in the parmed structure as well as the ncmc context object
        Parameters
        ----------
        context: simtk.openmm.Context object
            Context containing the positions to be moved.
        verbose : bool, default=False
            Enable verbosity to print out detailed information of the rotation.
        Returns
        -------
        context: simtk.openmm.Context object
            The same input context, but whose positions were changed by this function.
        """

        import copy
        import numpy
        from simtk import unit
    
        # Determine the axis, theta, residue, and bond + atoms to be rotated
        theta, target_atoms, res, bond = self.chooseBondandTheta()
        print('Rotating bond: %s in resnum: %s by %.2f radians' % (bond, res, theta))

        # Retrieve the current positions
        initial_positions = context.getState(getPositions=True).getPositions(asNumpy=True)
        nc_positions = copy.deepcopy(initial_positions)

        model = copy.copy(self.structure)
        atoms = list(model.topology.atoms())

        # Set the modeller model to the same coordinates as the context
        for idx, atom in enumerate(self.all_atoms):
            if self.verbose:
                print('Before:')
                print(atom, idx)
                print(nc_positions[atom], model.positions[atom])

            model.positions[atom] = nc_positions[atom]

            if self.verbose:
                print('After:')
                print(nc_positions[atom], model.positions[atom])

        positions = model.positions

        # Find the rotation axis using the updated positions
        axis1 = target_atoms[0]
        axis2 = target_atoms[1]
        rot_axis = (positions[axis1] - positions[axis2]) / positions.unit

        # Calculate the rotation matrix
        rot_matrix = self.rotation_matrix(rot_axis, theta)

        # Apply the rotation matrix to the target atoms
        for idx, atom in enumerate(target_atoms):

            my_position = positions[atom]

            if self.verbose:
                print('The current position for %i is: %s' % (atom, my_position))

            # Find the reduced position (substract out axis)
            red_position = (my_position - model.positions[axis2])._value
            # Find the new positions by multiplying by rot matrix
            new_position = numpy.dot(rot_matrix, red_position) * positions.unit + positions[axis2]

            if self.verbose: print("The new position should be:", new_position)

            positions[atom] = new_position
            # Update the parmed model with the new positions
            model.positions[atom][0] = new_position[0] 
            model.positions[atom][1] = new_position[1] 
            model.positions[atom][2] = new_position[2]

            # Update the copied ncmc context array with the new positions
            nc_positions[atom][0] = model.positions[atom][0] 
            nc_positions[atom][1] = model.positions[atom][1] 
            nc_positions[atom][2] = model.positions[atom][2]
  
            if self.verbose:
                print('The updated position for this atom is:', model.positions[atom])

        # update the actual ncmc context object with the new positions
        context.setPositions(nc_positions)

        # update the class structure positions
        self.structure.positions = model.positions

#         if self.write_move:
#             filename = 'sc_move_%s_%s_%s.pdb' % (res, axis1, axis2)
#             mod_prot = model.save(filename, overwrite=True)
        return context

In [9]:
# Instantiate a (modified) BLUES SideChain object
sidechain = SideChainMoveOpenMM(structure, [42], write_move=True)


Note: The above cell specifies that we want to rotate the sidechain of residue 42, but there are two chains (i.e. two residues with index 42), so in the next few cells, I modify the appropriate sidechain attributes (rot_atoms, rot_bonds, and qry_atoms) to only contain the atoms/bonds for residue 42 on barstar.

In [10]:
# Check sidechain.rot_atoms and keep only the one for barstar T42
rot_atoms = list(sidechain.rot_atoms[42].keys())
for i in range(3):
    beg_index = rot_atoms[i].GetBgnIdx()
    end_index = rot_atoms[i].GetEndIdx()
    print(beg_index)
    print(end_index)
    if beg_index == 2397:
        print(rot_atoms[i])
        desired_rot_atoms = {42: {rot_atoms[i]: sidechain.rot_atoms[42][rot_atoms[i]]}}
        
        

635
638
638
639
2397
2400
2419 (2397C-2400C)


In [11]:
# Check sidechain.rot_bonds and keep only the one for barstar T42
rot_bonds = list(sidechain.rot_bonds.keys())
for i in range(3):
    beg_index = rot_bonds[i].GetBgnIdx()
    end_index = rot_bonds[i].GetEndIdx()
    print(beg_index)
    print(end_index)
    if beg_index == 2397:
        print(rot_bonds[i])
        desired_rot_bonds = {rot_bonds[i]: 42}
        
        

635
638
638
639
2397
2400
2419 (2397C-2400C)


In [12]:
# Check sidechain.qry_atoms and keep only the ones for barstar T42
desired_qry_atoms = {}
for key, val in sidechain.qry_atoms.items():
    if val > 2396 and val < 2410:
        desired_qry_atoms[key] = val
        
        

In [13]:
# Keep only the desired rot_atoms, rot_bonds, qry_atoms
sidechain.rot_bonds = desired_rot_bonds
sidechain.rot_atoms = desired_rot_atoms
sidechain.qry_atoms = desired_qry_atoms
sidechain.atom_indices = sidechain.rot_atoms


Check that the rot_bonds, qry_atoms, and rot_atoms attributes are correct

In [14]:
sidechain.rot_bonds


{<openeye.oechem.OEBondBase; proxy of <Swig Object of type 'OEChem::OEBondBase *' at 0x2aec4b868de0> >: 42}

In [15]:
sidechain.qry_atoms


{<openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e94e0> >: 2400,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e92d0> >: 2401,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e9540> >: 2402,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e9240> >: 2403,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e9450> >: 2404,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e9270> >: 2405,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e9060> >: 2406,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec1c6e95a0> >: 2407,
 <openeye.oechem.OEAtomBase; proxy of <Swig Object of type 'OEChem::OEAtomBase *' at 0x2aec4b8684b0> >: 2408,
 <openeye.

In [16]:
sidechain.rot_atoms


{42: {<openeye.oechem.OEBondBase; proxy of <Swig Object of type 'OEChem::OEBondBase *' at 0x2aec4b868de0> >: [2397,
   2400,
   2402,
   2407,
   2408,
   2409,
   2405,
   2401,
   2406]}}

In [17]:
# Instantiate a BLUES MoveEngine
sidechain_mover = MoveEngine(sidechain)


In [18]:
## Subclass the BLUES SystemFactory object to take in an OpenMM SystemGenerator

class SystemFactoryOpenMM(SystemFactory):
    def __init__(self, structure, atom_indices, system_generator, config=None):
        self.structure = structure
        self.atom_indices = atom_indices
        self._config = config
    
        # If parameters for generating the openmm.System is given, make them.
        if self._config:
            if 'alchemical' in self._config.keys():
                self.alch_config = self._config.pop('alchemical')
            else:
                # Use function defaults if none is provided
                self.alch_config = {}
            self.md =  SystemFactoryOpenMM.generateSystem(self, self.structure, system_generator, **self._config)
            self.alch = SystemFactory.generateAlchSystem(self.md, self.atom_indices, **self.alch_config)
              
    def generateSystem(self, structure, system_generator, **kwargs):
        """
        Construct an OpenMM System representing the topology described by the
        prmtop file. This function is just a wrapper for parmed Structure.createSystem().
        Parameters
        ----------
        structure : app.PDBFile
            The app.PDBFile of the molecular system to be simulated
        nonbondedMethod : cutoff method
            This is the cutoff method. It can be either the NoCutoff,
            CutoffNonPeriodic, CutoffPeriodic, PME, or Ewald objects from the
            simtk.openmm.app namespace
        nonbondedCutoff : float or distance Quantity
            The nonbonded cutoff must be either a floating point number
            (interpreted as nanometers) or a Quantity with attached units. This
            is ignored if nonbondedMethod is NoCutoff.
        switchDistance : float or distance Quantity
            The distance at which the switching function is turned on for van
            der Waals interactions. This is ignored when no cutoff is used, and
            no switch is used if switchDistance is 0, negative, or greater than
            the cutoff
        constraints : None, app.HBonds, app.HAngles, or app.AllBonds
            Which type of constraints to add to the system (e.g., SHAKE). None
            means no bonds are constrained. HBonds means bonds with hydrogen are
            constrained
        rigidWater : bool=True
            If True, water is kept rigid regardless of the value of constraints.
            A value of False is ignored if constraints is not None.
        implicitSolvent : None, app.HCT, app.OBC1, app.OBC2, app.GBn, app.GBn2
            The Generalized Born implicit solvent model to use.
        implicitSolventKappa : float or 1/distance Quantity = None
            This is the Debye kappa property related to modeling saltwater
            conditions in GB. It should have units of 1/distance (1/nanometers
            is assumed if no units present). A value of None means that kappa
            will be calculated from implicitSolventSaltConc (below)
        implicitSolventSaltConc : float or amount/volume Quantity=0 moles/liter
            If implicitSolventKappa is None, the kappa will be computed from the
            salt concentration. It should have units compatible with mol/L
        temperature : float or temperature Quantity = 298.15 kelvin
            This is only used to compute kappa from implicitSolventSaltConc
        soluteDielectric : float=1.0
            The dielectric constant of the protein interior used in GB
        solventDielectric : float=78.5
            The dielectric constant of the water used in GB
        useSASA : bool=False
            If True, use the ACE non-polar solvation model. Otherwise, use no
            SASA-based nonpolar solvation model.
        removeCMMotion : bool=True
            If True, the center-of-mass motion will be removed periodically
            during the simulation. If False, it will not.
        hydrogenMass : float or mass quantity = None
            If not None, hydrogen masses will be changed to this mass and the
            difference subtracted from the attached heavy atom (hydrogen mass
            repartitioning)
        ewaldErrorTolerance : float=0.0005
            When using PME or Ewald, the Ewald parameters will be calculated
            from this value
        flexibleConstraints : bool=True
            If False, the energies and forces from the constrained degrees of
            freedom will NOT be computed. If True, they will (but those degrees
            of freedom will *still* be constrained).
        verbose : bool=False
            If True, the progress of this subroutine will be printed to stdout
        splitDihedrals : bool=False
            If True, the dihedrals will be split into two forces -- proper and
            impropers. This is primarily useful for debugging torsion parameter
            assignments.
        Returns
        -------
        openmm.System
            System formatted according to the PDB file
        Notes
        -----
        This function calls prune_empty_terms if any Topology lists have
        changed.
        """
        
        return system_generator.create_system(structure.topology)

In [19]:
# Instantiate (modified) BLUES SystemFactory
systems = SystemFactoryOpenMM(structure, sidechain.atom_indices, system_generator, cfg['system'])


In [20]:
## Subclass BLUES SimulationFactory object to avoid using parmed.Structure object, and instead use an
## OpenMM Modeller object. (Changes involves how box vectors are checked/set)

class SimulationFactoryOpenMM(SimulationFactory):
    
    @classmethod
    def generateSimFromStruct(cls, structure, system, integrator, platform=None, properties={}, **kwargs):
        """Generate the OpenMM Simulation objects from a given parmed.Structure()
        Parameters
        ----------
        structure : parmed.Structure
            ParmEd Structure object of the entire system to be simulated.
        system : openmm.System
            The OpenMM System object corresponding to the reference system.
        integrator : openmm.Integrator
            The OpenMM Integrator object for the simulation.
        platform : str, default = None
            Valid choices: 'Auto', 'OpenCL', 'CUDA'
            If None is specified, the fastest available platform will be used.
        Returns
        -------
        simulation : openmm.Simulation
            The generated OpenMM Simulation from the parmed.Structure, openmm.System,
            amd the integrator.
        """
        #Specifying platform properties here used for local development.
        if platform is None:
            #Use the fastest available platform
            simulation = app.Simulation(structure.topology, system, integrator)
        else:
            platform = openmm.Platform.getPlatformByName(platform)
            #Make sure key/values are strings
            properties = {str(k): str(v) for k, v in properties.items()}
            simulation = app.Simulation(structure.topology, system, integrator, platform, properties)

        # Set initial positions/velocities
        if structure.topology.getPeriodicBoxVectors():
            simulation.context.setPeriodicBoxVectors(*structure.topology.getPeriodicBoxVectors())
        simulation.context.setPositions(structure.positions)
        simulation.context.setVelocitiesToTemperature(integrator.getTemperature())

        return simulation

In [21]:
# Instantiate BLUES SimulationFactory
simulations = SimulationFactoryOpenMM(systems, sidechain_mover, cfg['simulation'], cfg['md_reporters'],
                                cfg['ncmc_reporters'])



INFO: Adding MonteCarloBarostat with 1.0 atm. MD simulation will be 300.0 K NPT.
INFO: OpenMM(7.4.2.dev-dc9d188) simulation generated for CUDA platform
system = Linux 
node = lt11 
release = 3.10.0-957.12.2.el7.x86_64 
version = #1 SMP Tue May 14 21:24:32 UTC 2019 
machine = x86_64 
processor = x86_64 
DeviceIndex = 0 
DeviceName = GeForce GTX 1080 Ti 
UseBlockingSync = true 
Precision = mixed 
UseCpuPme = false 
CudaCompiler = /usr/local/cuda/bin/nvcc 
TempDirectory = /scratch/lsftmp/16232491.tmpdir 
CudaHostCompiler =  
DisablePmeStream = false 
DeterministicForces = false 



In [22]:
# Run energy minimization 
state = simulations.md.context.getState(getPositions=True, getEnergy=True)
print('Pre-Minimized energy = {}'.format(state.getPotentialEnergy().in_units_of(unit.kilocalorie_per_mole)))

simulations.md.minimizeEnergy(maxIterations=0)
state = simulations.md.context.getState(getPositions=True, getEnergy=True)
print('Minimized energy = {}'.format(state.getPotentialEnergy().in_units_of(unit.kilocalorie_per_mole)))


Pre-Minimized energy = -3060.1951775699654 kcal/mol
Minimized energy = -115107.38151933263 kcal/mol


In [23]:
# Run 500 steps of MD
simulations.md.step(500)

#"Iter"	"Progress (%)"	"Step"	"Speed (ns/day)"	"Time Remaining"
md: 0	100.0%	500	0	--


In [24]:
# Run BLUES Simulation
blues = BLUESSimulation(simulations, cfg['simulation'])
blues.run()


INFO: Total BLUES Simulation Time = 50.0 ps (10.0 ps/Iter)
Total Force Evaluations = 12500 
Total NCMC time = 40.0 ps (8.0 ps/iter)
Total MD time = 10.0 ps (2.0 ps/iter)
Trajectory Interval = 10.0 ps/frame (1.0 frames/iter)
INFO: Running 5 BLUES iterations...
INFO: BLUES Iteration: 0
INFO: Advancing 2000 NCMC switching steps...
Performing SideChainMoveOpenMM...
Rotating bond: 2419 (2397C-2400C) in resnum: 42 by 4.98 radians
#"Iter"	"Progress (%)"	"Step"	"alchemicalLambda"	"protocolWork"	"Speed (ns/day)"	"Time Remaining"
ncmc: 0	100.0%	2000	0.0	0.0	0	--
NCMC MOVE ACCEPTED: work_ncmc -0.008618425137932685 > randnum -0.34011606054243115
Advancing 500 MD steps...
ERROR: [simulation._stepMD] Energy is NaN
Traceback (most recent call last):
  File "/lila/home/zhangi/choderalab/blues/blues/simulation.py", line 1202, in _stepMD
    self._md_sim.step(1)
  File "/home/zhangi/miniconda3/envs/perses-sims/lib/python3.7/site-packages/simtk/openmm/app/simulation.py", line 132, in step
    self._simul

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
