In [1]:
import numpy as np
import pandas as pd
try:
    import openmm as mm
    import openmm.app as app
    import openmm.unit as unit
except ImportError:
    import simtk.openmm as mm
    import simtk.openmm.app as app
    import simtk.unit as unit
import mdtraj
import sys
import os

try:
    import nglview
except ImportError:
    print('Please install nglview to visualize molecules in the jupyter notebooks.')

sys.path.append('../..')
from openabc.forcefields.parsers import MpipiProteinParser, MpipiRNAParser
from openabc.forcefields.mpipi_model import MpipiModel
import openabc.utils.helper_functions as helper_functions
from openabc.utils.insert import insert_molecules

# set simulation platform
#platform_name = 'CPU'
platform_name = 'CUDA'
#platform_name = 'OpenCL'




Here we show how to use Mpipi model to set up protein-RNA simulations. We use a mixture of polyR, polyK, and polyU as an example. First we build individual CG chains with given sequence, then we insert chains into a simulation box. Finally we run the MD simulation. 

In [2]:
# build residue level CG atom chains
sequence = 'GSMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNFSGRGGFGGSRGGGGYGGSGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNFGGRSSGGSGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF' # WT
#sequence = 'GSMASASSSQRGRSGSGNSGGGRGGGFGGNDNFGRGGNSSGRGGFGGSRGGGGYGGSGDGYNGFGNDGSNSGGGGSSNDFGNYNNQSSNFGPMKGGNFGGRSSGGSGGGGQYSAKPRNQGGYGGSSSSSSSGSGRRF' # ARO-
ca_pdb = 'init_A1LCD_WT_CA.pdb'
ca_atoms = helper_functions.build_straight_CA_chain(sequence, r0=0.38)
helper_functions.write_pdb(ca_atoms, ca_pdb)

# parse individual protein and RNA
a1lcd = MpipiProteinParser(ca_pdb)
#polyU = MpipiRNAParser('polyU_CG.pdb')

Parse molecule with default settings.


In [3]:
# insert chains into a box
n_mol = 100
box_size = 100
if not os.path.exists('start.pdb'):
    insert_molecules(ca_pdb, 'start.pdb', n_mol, box=[box_size, box_size, box_size])
else:
    # delete and create a new start.pdb
    # otherwise, there will be an error for init_coord in the next step
    os.remove('start.pdb')
    insert_molecules(ca_pdb, 'start.pdb', n_mol, box=[box_size, box_size, box_size])

Successfully inserted 100 molecules.


In [4]:
# visualize start.pdb
start_pdb = mdtraj.load_pdb('start.pdb')
view = nglview.show_mdtraj(start_pdb)
view

NGLWidget()

In [5]:
# set up simulation
top = app.PDBFile('start.pdb').getTopology()
protein_rna = MpipiModel()
for i in range(n_mol):
    protein_rna.append_mol(a1lcd)
protein_rna.create_system(top, box_a=box_size, box_b=box_size, box_c=box_size)
protein_rna.add_protein_bonds(force_group=1)
#protein_rna.add_rna_bonds(force_group=2)
protein_rna.add_contacts(force_group=3)
protein_rna.add_dh_elec(ldby=(1/1.26)*unit.nanometer, force_group=4)
temperature = 300*unit.kelvin
friction_coeff = 1/unit.picosecond
timestep = 10*unit.femtosecond
integrator = mm.LangevinMiddleIntegrator(temperature, friction_coeff, timestep)
init_coord = app.PDBFile('start.pdb').getPositions()
protein_rna.set_simulation(integrator, platform_name=platform_name, init_coord=init_coord)
protein_rna.simulation.minimizeEnergy()
output_interval = 1000
output_dcd = 'output.dcd'
protein_rna.add_reporters(output_interval, output_dcd)
protein_rna.simulation.context.setVelocitiesToTemperature(temperature)
protein_rna.simulation.step(10000)

Add protein bonds.
Add nonbonded contacts.
Add Debye-Huckel electrostatic interactions.
Set Debye length as 0.7936507936507936 nm.
Set water dielectric as 80.0.
Use platform: CUDA
Use precision: mixed
#"Step","Time (ps)","Potential Energy (kJ/mole)","Kinetic Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)","Speed (ns/day)"


ValueError: Energy is NaN.  For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#nan

In [None]:
# view trajectory
traj = mdtraj.load_dcd('output.dcd', top='start.pdb')
traj.xyz -= np.mean(traj.xyz, axis=1, keepdims=True) # realign to the origin
view = nglview.show_mdtraj(traj)
view

NGLWidget(max_frame=49)