In [1]:
import os
import numpy as np
import mdtraj as md
import pickle
from simtk.openmm import unit
from tqdm import tqdm_notebook
from simtk.openmm import app
import matplotlib.pyplot as plt
from openeye import oechem



In [2]:
def new_positions(hybrid_positions):
    n_atoms_new = htf._topology_proposal.n_atoms_new
    hybrid_indices = [htf._new_to_hybrid_map[idx] for idx in range(n_atoms_new)]
    return hybrid_positions[hybrid_indices, :]
    
def old_positions(hybrid_positions):
    n_atoms_old = htf._topology_proposal.n_atoms_old
    hybrid_indices = [htf._old_to_hybrid_map[idx] for idx in range(n_atoms_old)]
    return hybrid_positions[hybrid_indices, :]

In [6]:
def make_traj(outdir, htf, i, phase, name, endstate, is_old=True, solvent_only=False):
    with open(os.path.join(outdir, f"{i}_{phase}_{name}_2ns_snapshots.npy"), "rb") as f:
        cache = np.load(f)
    if is_old:
        old_pos = np.zeros(shape=(200, htf._topology_proposal.old_topology.getNumAtoms(), 3))
        for j, pos in tqdm_notebook(enumerate(cache)):
            old_pos[j] = old_positions(unit.Quantity(pos, unit=unit.nanometers)).value_in_unit_system(unit.md_unit_system)
        
        traj = md.Trajectory(old_pos, md.Topology.from_openmm(htf._topology_proposal.old_topology))
    else:
        new_pos = np.zeros(shape=(200, htf._topology_proposal.new_topology.getNumAtoms(), 3))
        for j, pos in tqdm_notebook(enumerate(cache)):
            new_pos[j] = new_positions(unit.Quantity(pos, unit=unit.nanometers)).value_in_unit_system(unit.md_unit_system)
        
        traj = md.Trajectory(new_pos, md.Topology.from_openmm(htf._topology_proposal.new_topology))
        
    if solvent_only:
        traj.atom_slice(traj.topology.select("water or resname 'na\+' or resn 'cl\-'"), inplace=True)
        
    # Set unit cell vectors in traj 
    box_vectors = [val.value_in_unit_system(unit.md_unit_system) for val in htf.hybrid_system.getDefaultPeriodicBoxVectors()]
    box_vectors_formatted = np.ndarray(shape=(3,3), buffer=np.array(box_vectors)).astype(np.float32) # note that mdraj expect np.ndarray, not np.arrays
    box_vectors_repeated = np.repeat(np.array([box_vectors_formatted]), traj.xyz.shape[0], axis=0)
    traj.unitcell_vectors = box_vectors_repeated
    
    traj = traj.image_molecules()
    
    if solvent_only:
        traj.save(os.path.join(outdir, f"{i}_{phase}_{endstate}_solvent.dcd"))
        traj[0].save(os.path.join(outdir, f"{i}_{phase}_{endstate}_solvent.pdb"))
    else:
        traj.save(os.path.join(outdir, f"{i}_{phase}_{endstate}.dcd"))
#         traj[0].save(os.path.join(outdir, f"{i}_{phase}_{endstate}.pdb"))
        app.PDBxFile.writeFile(traj.topology.to_openmm(), traj.openmm_positions(0), file=open(os.path.join(outdir, f"{i}_{phase}_{endstate}.cif"), "w"), keepIds=True)
    return traj
       

## 14/113 N501Y

In [10]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/113/"
i = 113
phase = 'complex'
endstate = 0
name = 'asn'
is_old = True

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185377 atoms, 57660 residues, and unitcells at 0x2b278789adf0>

In [11]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/113/"
i = 113
phase = 'apo'
endstate = 0
name = 'asn'
is_old = True

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 49086 atoms, 15547 residues, and unitcells at 0x2b27682bcca0>

In [12]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/113/"
i = 113
phase = 'complex'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185384 atoms, 57660 residues, and unitcells at 0x2b276987e580>

In [13]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/113/"
i = 113
phase = 'apo'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 49093 atoms, 15547 residues, and unitcells at 0x2b27b9d2c3d0>

## 14/79 N501Y

In [5]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/79/"
i = 79
phase = 'complex'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 183585 atoms, 57059 residues, and unitcells at 0x2aae4fc00d90>

## 14/119 Y501N

In [4]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/119/"
i = 119
phase = 'complex'
endstate = 0
name = 'tyr'
is_old = True

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 145340 atoms, 44809 residues, and unitcells at 0x2ae22aa032e0>

## 14/120 K417N

In [4]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/120/"
i = 120
phase = 'complex'
endstate = 0
name = 'lys'
is_old = True

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185785 atoms, 57660 residues, and unitcells at 0x2ad33e2238e0>

In [5]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/120/"
i = 120
phase = 'complex'
endstate = 1
name = 'asn'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185777 atoms, 57660 residues, and unitcells at 0x2ad2cfa5ad00>

## 14/147 N501Y with rest region as whole interface

In [4]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/147/"
i = 147
phase = 'complex'
endstate = 0
name = 'asn'
is_old = True

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185377 atoms, 57660 residues, and unitcells at 0x2abba5a49df0>

In [5]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/147/"
i = 147
phase = 'complex'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

KeyboardInterrupt: 

## 24/1 N501Y (scaled water rest)

In [5]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/24/1/"
i = 1
phase = 'complex'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185384 atoms, 57660 residues, and unitcells at 0x2b478ec2ea00>

## 24/2 N501Y (scaled water rest)

In [4]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/24/2/"
i = 2
phase = 'complex'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

INFO:rdkit:Enabling RDKit 2021.03.5 jupyter extensions
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 100 frames, 185384 atoms, 57660 residues, and unitcells at 0x2b62a4cec520>

In [5]:
htf

<perses.annihilation.relative.RepartitionedHybridTopologyFactory at 0x2ae770e20640>

### Check the unitcell vectors used for imaging this traj

In [17]:
with open(os.path.join(outdir, f"{i}_{phase}_{name}_1ns_snapshots.npy"), "rb") as f:
    cache = np.load(f)
    
new_pos = np.zeros(shape=(100, htf._topology_proposal.new_topology.getNumAtoms(), 3))
for j, pos in tqdm_notebook(enumerate(cache)):
    new_pos[j] = new_positions(unit.Quantity(pos, unit=unit.nanometers)).value_in_unit_system(unit.md_unit_system)

traj = md.Trajectory(new_pos, md.Topology.from_openmm(htf._topology_proposal.new_topology))

box_vectors = np.array([val.value_in_unit_system(unit.md_unit_system) for val in htf.hybrid_system.getDefaultPeriodicBoxVectors()])
vectors = np.array([box_vectors]) 

    
# Set unit cell vectors in traj 
traj.unitcell_vectors = np.repeat(vectors, 100, axis=0)
# traj = traj.image_molecules()
    

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

In [19]:
traj

<mdtraj.Trajectory with 100 frames, 185384 atoms, 57660 residues, and unitcells at 0x2ae84773f070>

In [36]:
traj.unitcell_vectors[0][0]

array([13.5565,  0.    ,  0.    ])

In [23]:
htf.hybrid_system.getDefaultPeriodicBoxVectors()

[Quantity(value=Vec3(x=13.556500000000002, y=0.0, z=0.0), unit=nanometer),
 Quantity(value=Vec3(x=4.518833333333333, y=12.781190772207244, z=0.0), unit=nanometer),
 Quantity(value=Vec3(x=-4.518833333333333, y=6.390595386103622, z=11.068835899346718), unit=nanometer)]

In [34]:
traj.unitcell_vectors

array([[[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        [-4.51883333,  6.39059539, 11.0688359 ]],

       [[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        [-4.51883333,  6.39059539, 11.0688359 ]],

       [[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        [-4.51883333,  6.39059539, 11.0688359 ]],

       [[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        [-4.51883333,  6.39059539, 11.0688359 ]],

       [[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        [-4.51883333,  6.39059539, 11.0688359 ]],

       [[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        [-4.51883333,  6.39059539, 11.0688359 ]],

       [[13.5565    ,  0.        ,  0.        ],
        [ 4.51883333, 12.78119077,  0.        ],
        

## 15/157 N501Y (normal rest)

In [8]:
outdir = "/data/chodera/zhangi/perses_benchmark/neq/14/157/scatter/157/"
i = 157
phase = 'complex'
endstate = 1
name = 'tyr'
is_old = False

with open(os.path.join(outdir, f"{i}_{phase}_{endstate}.pickle"), "rb") as f:
    htf = pickle.load(f)
    
make_traj(outdir, htf, i, phase, name, endstate, is_old)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for j, pos in tqdm_notebook(enumerate(cache)):


0it [00:00, ?it/s]

<mdtraj.Trajectory with 200 frames, 185384 atoms, 57660 residues, and unitcells at 0x2b60e1c2edc0>

# Check the box vectors used for solvation

In [25]:
import simtk.openmm as openmm
import simtk.openmm.app as app
import simtk.unit as unit
import numpy as np
from perses.app.relative_point_mutation_setup import PointMutationExecutor

# Set up logger
import logging
_logger = logging.getLogger("setup")
_logger.setLevel(logging.INFO)

class PointMutationExecutor2(PointMutationExecutor):
    def _solvate(self,
               topology,
               positions,
               water_model,
               phase,
               ionic_strength,
               box_dimensions=None):
        """
        Generate a solvated topology, positions, and system for a given input topology and positions.
        For generating the system, the forcefield files provided in the constructor will be used.
        Parameters
        ----------
        topology : app.Topology
            Topology of the system to solvate
        positions : [n, 3] ndarray of Quantity nm
            the positions of the unsolvated system
        forcefield : SystemGenerator.forcefield
            forcefield file of solvent to add
        water_model : str
            solvent model to use for solvation
        phase : str
            if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p
        ionic_strength : float * unit.molar
            the total concentration of ions (both positive and negative) to add using Modeller.
            This does not include ions that are added to neutralize the system.
            Note that only monovalent ions are currently supported.
        Returns
        -------
        solvated_topology : app.Topology
            Topology of the system with added waters
        solvated_positions : [n + 3(n_waters), 3] ndarray of Quantity nm
            Solvated positions
        solvated_system : openmm.System
            The parameterized system, containing a barostat if one was specified.
        """
        modeller = app.Modeller(topology, positions)

        geompadding = 0.9 * unit.nanometers
        maxSize = max(max((pos[i] for pos in positions))-min((pos[i] for pos in positions)) for i in range(3))
        vectors = openmm.Vec3(1,0,0), openmm.Vec3(1/3,2*np.sqrt(2)/3,0), openmm.Vec3(-1/3,np.sqrt(2)/3,np.sqrt(6)/3)
        boxVectors = [(maxSize+geompadding)*v for v in vectors]
        print(boxVectors)
        # Now we have to add missing atoms
        if phase != 'vacuum':
            _logger.info(f"solvating at {ionic_strength} using {water_model}")
            modeller.addSolvent(self.system_generator.forcefield, model=water_model, boxVectors=boxVectors, ionicStrength=ionic_strength)
        else:
            pass

        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()

        # Canonicalize the solvated positions: turn tuples into np.array
        solvated_positions = unit.quantity.Quantity(value=np.array([list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system)]), unit=unit.nanometers)
        solvated_system = self.system_generator.create_system(solvated_topology)

        return solvated_topology, solvated_positions, solvated_system

In [27]:
rbd_file = "/home/zhangi/choderalab/perses_benchmark/perses_protein_mutations/input/rbd_protonated.pdb"
ace2_file = "/home/zhangi/choderalab/perses_benchmark/perses_protein_mutations/input/ace2_protonated.pdb"

solvent_delivery = PointMutationExecutor2(rbd_file,
                        '1',
                        '501',
                        'TYR',
                        ligand_input=ace2_file,
                        forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml', '/home/zhangi/choderalab/openmmforcefields/amber/ffxml/GLYCAM_06j-1.xml'],
                        ionic_strength=0.15*unit.molar,
                        flatten_torsions=True,
                        flatten_exceptions=True,
                        generate_unmodified_hybrid_topology_factory=True,
                        generate_rest_capable_hybrid_topology_factory=True,
                        conduct_endstate_validation=False
                       )

DEBUG:openmmforcefields.system_generators:Trying GAFFTemplateGenerator to load gaff-2.11
INFO:setup:solvating at 0.15 M using tip3p


[Quantity(value=Vec3(x=8.625399999999999, y=0.0, z=0.0), unit=nanometer), Quantity(value=Vec3(x=2.875133333333333, y=8.13210510726193, z=0.0), unit=nanometer), Quantity(value=Vec3(x=-2.875133333333333, y=4.066052553630965, z=7.042609609134006), unit=nanometer)]


INFO:setup:solvating at 0.15 M using tip3p


[Quantity(value=Vec3(x=13.556500000000002, y=0.0, z=0.0), unit=nanometer), Quantity(value=Vec3(x=4.518833333333333, y=12.781190772207244, z=0.0), unit=nanometer), Quantity(value=Vec3(x=-4.518833333333333, y=6.390595386103622, z=11.068835899346718), unit=nanometer)]


KeyboardInterrupt: 