In [1]:
from simtk import unit, openmm
from simtk.openmm import app
import sys
import openmmtools
import pandas as pd
import numpy as np
import pickle

class MMGBSA:
	def __init__(self, input_file, is_old=True):
		print("Instantiating MMGBSA...")
		with open(input_file, "rb") as f: # note: cannot read in complex_old.pdb bc its missing the bonds for NLNs
			htf = pickle.load(f)
		if is_old:
			print("using old positions")
			modeller = app.Modeller(htf._topology_proposal.old_topology, htf.old_positions(htf.hybrid_positions))
		else:
			print("using new positions")
			modeller = app.Modeller(htf._topology_proposal.new_topology, htf.new_positions(htf.hybrid_positions))
		solvent_chains = [chain for chain in modeller.topology.chains() if chain.id in ['6', '7']]
		modeller.delete(solvent_chains)
		self.complexTopology = modeller.topology
		self.complexPositions = modeller.positions
		self.complexEnergy = 0
		self.proteinEnergy = 0
		self.ligandEnergy = 0
		self.minimizedState = openmm.State()
		self._pH = 7.0
		self.forcefield = app.ForceField('amber14/protein.ff14SB.xml', '/home/zhangi/choderalab/openmmforcefields/amber/ffxml/GLYCAM_06j-1.xml', 'implicit/obc1.xml', 'amber14/tip3p.xml')
		self.platform = openmmtools.utils.get_fastest_platform()
 
	def setpH(self, pH):
		'''Set the pH at which to protonate the System, if necessary'''
		print("Setting pH...")
		if isinstance(pH, float):
			self._pH = pH
		else:
			raise TypeError("pH must be a float")

	def getpH(self):
		'''Get the pH of the System'''
		print("Getting pH...")
		return self._pH

	def addHydrogens(self):
		'''Protonate the complex and update the positions and topology'''
		print("Adding hydrogens...")
		modeller = app.Modeller(self.complexTopology, self.complexPositions)
		modeller.addHydrogens(self.forcefield, pH=self._pH)
		self.complexTopology = modeller.topology
		self.complexPositions = modeller.positions

	def computeEnergy(self):
		'''Run energy minimization and update the final energy and final state.

		Returns
		-------
		float
			Final energy in kcal/mol
		'''
		# Set up system
		print('Creating OpenMM System...')
		system = self.forcefield.createSystem(self.complexTopology, nonbondedMethod=app.NoCutoff, 
		 constraints=app.HBonds, hydrogenMass=4*unit.amu)

		# Set up integrator
		integrator = openmmtools.integrators.LangevinIntegrator(300*unit.kelvin, 1/unit.picosecond, 4*unit.femtosecond)

		# Set up simulation 
		print("Setting up the simulation...")
		simulation = app.Simulation(self.complexTopology, system, integrator, self.platform)
		simulation.context.setPositions(self.complexPositions)

		# Minimize the energy
		print("Minimizing the energy...")
		initial_state = simulation.context.getState(getEnergy=True, getPositions=True)
		initial_energy = initial_state.getPotentialEnergy() / unit.kilocalories_per_mole # before conversion: kJ/mol
		print('  initial : %8.3f kcal/mol' % (initial_energy))
		simulation.minimizeEnergy()
		final_state = simulation.context.getState(getEnergy=True, getPositions=True)
		final_energy = final_state.getPotentialEnergy() / unit.kilocalories_per_mole
		print('  final   : %8.3f kcal/mol' % (final_energy))
		
		self.complexEnergy = final_energy
		self.minimizedState = final_state
		return final_energy

	def extract(self, chains_to_keep, extract_protein):
		'''Extract the protein or ligand from the complex and return its energy. 

		Parameters
		----------
		chains_to_keep : list of strings
			Chain id(s) to keep
		extract_protein : boolean
			If true, extract protein. Otherwise, extract ligand.

		Returns
		-------
		float
			Final energy in kcal/mol
		'''
		
		try:
			self.minimizedState.getPositions()
		except Exception:
			print("The System has not been minimized -- you must minimize before extracting.")

		if not isinstance(extract_protein, bool):
			raise TypeError("extract_protein should be boolean indicating whether to extract the protein (True) or ligand (False)")

		## Extract topology
		print("Extracting topology from PL...")
		
		# Create new topology
		new_topology = app.Topology()
		
		# Copy residues and atoms to new topology for chains_to_keep
		d_old_to_new = {} # Key: atom in old topology, Value: atom in new topology
		for chain in self.complexTopology.chains():
			if chain.id in chains_to_keep:
				new_chain = new_topology.addChain(id=chain.id)
				for res in chain.residues():   
					# Copy residues and atoms
					new_res = new_topology.addResidue(res.name, new_chain, id=res.id)
					for atom in res.atoms():
						new_atom = new_topology.addAtom(atom.name, atom.element, new_res)
						d_old_to_new[atom] = new_atom

		# Make list of atoms to delete
		atoms_to_delete = []
		for res in self.complexTopology.residues():
			if res.chain.id not in chains_to_keep:
				for atom in res.atoms():
					atoms_to_delete.append(atom)

		# Copy bonds to new topology, except bonds involving atoms to delete
		for bond in self.complexTopology.bonds():
			atom_1 = bond[0]
			atom_2 = bond[1]
			if (atom_1 in atoms_to_delete) or (atom_2 in atoms_to_delete):
				continue
			atom_1_new = d_old_to_new[atom_1]
			atom_2_new = d_old_to_new[atom_2]
			new_topology.addBond(atom_1_new, atom_2_new)

		## Extract the positions
		print("Extracting positions from PL...")
		atoms = [atom.index for atom in self.complexTopology.atoms() if atom.residue.chain.id in chains_to_keep]
		positions = [position for i, position in enumerate(self.minimizedState.getPositions()) if i in atoms]
		
		## Extract the energy
		print("Extracting energy from PL...")
		# Set up system
		system = self.forcefield.createSystem(new_topology, nonbondedMethod=app.NoCutoff, 
			constraints=app.HBonds, hydrogenMass=4*unit.amu)

		# Set up integrator
		integrator = openmmtools.integrators.LangevinIntegrator(300*unit.kelvin, 1/unit.picosecond, 4*unit.femtosecond)

		# Set up simulation 
		simulation = app.Simulation(new_topology, system, integrator, self.platform)
		simulation.context.setPositions(positions)

		# Get the initial energy
		initial_state = simulation.context.getState(getEnergy=True, getPositions=True)
		initial_energy = initial_state.getPotentialEnergy() / unit.kilocalories_per_mole # before conversion: kJ/mol
		print('  initial : %8.3f kcal/mol' % (initial_energy))

		if extract_protein:
			self.proteinEnergy = initial_energy
		else:
			self.ligandEnergy = initial_energy

	def computeDeltaG(self):
		'''Compute MM/GBSA free energy estimate'''
		print("Computing MM/GBSA free energy estimate..")
		return self.complexEnergy - self.proteinEnergy - self.ligandEnergy

	def mutate(self):
		'''Mutate the complex by building a homology model'''




In [66]:
data = []

input_file_PL = "/data/chodera/zhangi/perses_benchmark/neq/14/120/120_complex.pickle"
protein_chains = ["1", "2"]
ligand_chains = ["3", "4", "5"]

system = MMGBSA(input_file_PL)

system.computeEnergy()
system.extract(protein_chains, True)
system.extract(ligand_chains, False)
deltaG = system.computeDeltaG()
data.append(deltaG)

Instantiating MMGBSA...
Creating OpenMM System...
Setting up the simulation...
Minimizing the energy...
  initial : -22780.137 kcal/mol
  final   : -26945.847 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -5705.087 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -21143.667 kcal/mol
Computing MM/GBSA free energy estimate..


In [71]:
protein_chains = ["1", "2"]
ligand_chains = ["3", "4", "5"]

for i in list(range(120, 132)) + list(range(134, 136)) + list(range(137, 145)):
    input_file_PL = f"/data/chodera/zhangi/perses_benchmark/neq/14/{i}/{i}_complex.pickle"

    system = MMGBSA(input_file_PL, is_old=False)

    system.computeEnergy()
    system.extract(protein_chains, True)
    system.extract(ligand_chains, False)
    deltaG = system.computeDeltaG()
    data.append(deltaG)

Instantiating MMGBSA...
using new positions
Creating OpenMM System...
Setting up the simulation...
Minimizing the energy...
  initial : -22784.656 kcal/mol
  final   : -26963.858 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -5750.099 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -21128.398 kcal/mol
Computing MM/GBSA free energy estimate..
Instantiating MMGBSA...
using new positions
Creating OpenMM System...
Setting up the simulation...
Minimizing the energy...
  initial : -22718.578 kcal/mol
  final   : -26905.488 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -5663.955 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -21145.400 kcal/mol
Computing MM/GBSA free energy estimate..
Instantiating MMGBSA...
using new positions
Crea

Minimizing the energy...
  initial : 238953896.750 kcal/mol
  final   : -27011.685 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -5784.690 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -21140.126 kcal/mol
Computing MM/GBSA free energy estimate..
Instantiating MMGBSA...
using new positions
Creating OpenMM System...
Setting up the simulation...
Minimizing the energy...
  initial : 159247571.702 kcal/mol
  final   : -26842.133 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -5618.255 kcal/mol
Extracting topology from PL...
Extracting positions from PL...
Extracting energy from PL...
  initial : -21131.754 kcal/mol
Computing MM/GBSA free energy estimate..
Instantiating MMGBSA...
using new positions
Creating OpenMM System...
Setting up the simulation...
Minimizing the energy...
  initial : -2228

In [73]:
data

[-97.09327945745463,
 -85.36052521510646,
 -96.13398885635615,
 -98.38773825884346,
 -101.34543424354706,
 -100.54485838910114,
 -105.93088999760948,
 -100.35066622849263,
 -100.31332158221994,
 -95.85670485779337,
 -92.96482881214251,
 -92.77110345960682,
 -88.67579618785385,
 -94.98610779158844,
 -92.78090642925599,
 -90.70080963192959,
 -101.75295769598597,
 -96.00888429135011,
 -86.86831530831842,
 -92.12457427103072,
 -77.09522137906242,
 -86.52941264340188,
 -100.02483418977135]

In [76]:
len(data)

23

In [75]:
with open("mmgbsa_data.npy", "wb") as f:
    np.save(f, np.array(data))

In [4]:
with open("mmgbsa_data.npy", "rb") as f:
    data = np.load(f)

In [6]:
for deltaG in data[1:]:
    print(deltaG - data[0])

11.732754242348165
0.9592906010984734
-1.2944588013888279
-4.252154786092433
-3.451578931646509
-8.837610540154856
-3.257386771038
-3.220042124765314
1.2365745996612532
4.128450645312114
4.322175997847808
8.41748326960078
2.10717166586619
4.312373028198635
6.3924698255250405
-4.659678238531342
1.084395166104514
10.224964149136213
4.968705186423904
19.998058078392205
10.563866814052744
-2.9315547323167266
