In [1]:
from perses.analysis import utils
from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer
from simtk import unit
import os
import itertools
from tqdm import tqdm_notebook
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import logging
from perses.analysis import utils

from openmmtools.constants import kB
KT_KCALMOL = kB * 298 * unit.kelvin / unit.kilocalories_per_mole

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Given a path, get dg and ddg
def analyze(path):
    reporter = MultiStateReporter(path)
    analyzer = MultiStateSamplerAnalyzer(reporter)
    f_ij, df_ij = analyzer.get_free_energy()
    f = f_ij[0,-1] # units = kT
#     f = f_ij[0,-1] * analyzer.kT
#     dg = f.in_units_of(unit.kilocalories_per_mole)
    df = df_ij[0, -1]
#     df = df_ij[0, -1] * analyzer.kT
#     ddg = df.in_units_of(unit.kilocalories_per_mole)
    return f, df

def analyze_dom(path):
    reporter = MultiStateReporter(path)
    ncfile = utils.open_netcdf(path)
    n_iterations = ncfile.variables['last_iteration'][0]
    print(f"n_iterations: {n_iterations}")
    dg, ddg = list(), list()
#     iterations_to_analyze = list(range (1, n_iterations, 500)) + [int(n_iterations-1)]
#     print(iterations_to_analyze) # check that this is giving me the last avail iteration
#     for step in iterations_to_analyze: 
#         analyzer = MultiStateSamplerAnalyzer(reporter, max_n_iterations=step)
#         f_ij, df_ij = analyzer.get_free_energy()
#         dg.append(f_ij[0,-1])
#         ddg.append(df_ij[0,-1])
    # Get final free energy
    analyzer = MultiStateSamplerAnalyzer(reporter, max_n_iterations=n_iterations)
    f_ij, df_ij = analyzer.get_free_energy()
    dg.append(f_ij[0,-1])
    ddg.append(df_ij[0,-1])
    return np.array(dg), np.array(ddg)

### 10 ns N501Y (repex/31/3/0)

In [6]:
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

In [3]:
main_dir = 31
sub_dir = 3
titles = ['N501Y']

In [4]:
for r in tqdm_notebook([0]):
    print (f"replicate {r}")
    output_dir = f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/'
    
    print("analyzing complex")
    complex_path = os.path.join(output_dir, f"{r}_complex.nc")
    complex_dg, complex_ddg = analyze_dom(complex_path)
    print("analyzing apo")
    apo_path = os.path.join(output_dir, f"{r}_apo.nc")
    apo_dg, apo_ddg = analyze_dom(apo_path)

    results = np.array([complex_dg, complex_ddg, apo_dg, apo_ddg])
    with open(f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy', 'wb') as f:
        np.save(f, results)

  0%|          | 0/1 [00:00<?, ?it/s]



replicate 0
analyzing complex
n_iterations: 9826




analyzing apo
n_iterations: 10000


In [5]:
# Read the dicts in and compute free energies (kcal/mol)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1]*KT_KCALMOL for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kcal/mol")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kcal/mol")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kcal/mol")
        print()



0 complex: 72.38742211518112 (0.6997592397603635) kcal/mol
0 apo: 65.70679011803303 (0.14887786508109993) kcal/mol
0 DDG: 6.680631997148097 (0.7154212831200286) kcal/mol



### 10 ns Y501N (repex/31/4/0)

In [6]:
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

In [7]:
main_dir = 31
sub_dir = 4
titles = ['Y501N']

In [8]:
for r in tqdm_notebook([0]):
    print (f"replicate {r}")
    output_dir = f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/'
    
    print("analyzing complex")
    complex_path = os.path.join(output_dir, f"{r}_complex.nc")
    complex_dg, complex_ddg = analyze_dom(complex_path)
    print("analyzing apo")
    apo_path = os.path.join(output_dir, f"{r}_apo.nc")
    apo_dg, apo_ddg = analyze_dom(apo_path)

    results = np.array([complex_dg, complex_ddg, apo_dg, apo_ddg])
    with open(f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy', 'wb') as f:
        np.save(f, results)

  0%|          | 0/1 [00:00<?, ?it/s]



replicate 0
analyzing complex




n_iterations: 10000




analyzing apo
n_iterations: 10000


In [9]:
# Read the dicts in and compute free energies (kcal/mol)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1]*KT_KCALMOL for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kcal/mol")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kcal/mol")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kcal/mol")
        print()



0 complex: -69.69210585387926 (0.6186782843571833) kcal/mol
0 apo: -66.20697127924979 (0.14221286224640278) kcal/mol
0 DDG: -3.4851345746294697 (0.634812821013771) kcal/mol



### 10 ns K417N (repex/31/5/0)

In [3]:
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

In [4]:
main_dir = 31
sub_dir = 5
titles = ['K417N']

In [5]:
for r in tqdm_notebook([0]):
    print (f"replicate {r}")
    output_dir = f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/'
    
    print("analyzing complex")
    complex_path = os.path.join(output_dir, f"{r}_complex.nc")
    complex_dg, complex_ddg = analyze_dom(complex_path)
    print("analyzing apo")
    apo_path = os.path.join(output_dir, f"{r}_apo.nc")
    apo_dg, apo_ddg = analyze_dom(apo_path)

    results = np.array([complex_dg, complex_ddg, apo_dg, apo_ddg])
    with open(f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy', 'wb') as f:
        np.save(f, results)

  0%|          | 0/1 [00:00<?, ?it/s]



replicate 0
analyzing complex
n_iterations: 10000




analyzing apo
n_iterations: 10000


In [6]:
# Read the dicts in and compute free energies (kT)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1] for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kT")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kT")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kT")
        print()


0 complex: -249.2366555543377 (0.6677592289758018) kT
0 apo: -251.4332586580079 (0.30072643055274284) kT
0 DDG: 2.1966031036702134 (0.7323515371154422) kT



In [7]:
# Read the dicts in and compute free energies (kcal/mol)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1]*KT_KCALMOL for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kcal/mol")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kcal/mol")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kcal/mol")
        print()



0 complex: -147.59467471143935 (0.3954382471030944) kcal/mol
0 apo: -148.89547422600216 (0.17808624335711945) kcal/mol
0 DDG: 1.3007995145628115 (0.43368896382663363) kcal/mol



### 10 ns N417K (repex/31/6/0)

In [9]:
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

In [10]:
main_dir = 31
sub_dir = 6
titles = ['N417K']

In [11]:
for r in tqdm_notebook([0]):
    print (f"replicate {r}")
    output_dir = f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/'
    
    print("analyzing complex")
    complex_path = os.path.join(output_dir, f"{r}_complex.nc")
    complex_dg, complex_ddg = analyze_dom(complex_path)
    print("analyzing apo")
    apo_path = os.path.join(output_dir, f"{r}_apo.nc")
    apo_dg, apo_ddg = analyze_dom(apo_path)

    results = np.array([complex_dg, complex_ddg, apo_dg, apo_ddg])
    with open(f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy', 'wb') as f:
        np.save(f, results)

  0%|          | 0/1 [00:00<?, ?it/s]



replicate 0
analyzing complex
n_iterations: 10000




analyzing apo
n_iterations: 10000


In [12]:
# Read the dicts in and compute free energies (kcal/mol)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1]*KT_KCALMOL for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kcal/mol")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kcal/mol")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kcal/mol")
        print()



0 complex: -17.651776214266906 (0.5098533851734851) kcal/mol
0 apo: -15.872766396876752 (0.1678665110475393) kcal/mol
0 DDG: -1.779009817390154 (0.5367770858597969) kcal/mol



### 10 ns E484K (repex/31/7/0)

In [13]:
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

In [14]:
main_dir = 31
sub_dir = 7

In [15]:
for r in tqdm_notebook([0]):
    print (f"replicate {r}")
    output_dir = f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/'
    
    print("analyzing complex")
    complex_path = os.path.join(output_dir, f"{r}_complex.nc")
    complex_dg, complex_ddg = analyze_dom(complex_path)
    print("analyzing apo")
    apo_path = os.path.join(output_dir, f"{r}_apo.nc")
    apo_dg, apo_ddg = analyze_dom(apo_path)

    results = np.array([complex_dg, complex_ddg, apo_dg, apo_ddg])
    with open(f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy', 'wb') as f:
        np.save(f, results)

  0%|          | 0/1 [00:00<?, ?it/s]



replicate 0
analyzing complex
n_iterations: 10000




analyzing apo
n_iterations: 10000


In [16]:
# Read the dicts in and compute free energies (kcal/mol)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1]*KT_KCALMOL for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kcal/mol")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kcal/mol")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kcal/mol")
        print()



0 complex: -102.03657311627684 (0.7209974065943968) kcal/mol
0 apo: -100.85390303459356 (0.27340268394871076) kcal/mol
0 DDG: -1.182670081683284 (0.7710942146756158) kcal/mol



### 10 ns K484E (repex/31/8/0)

In [14]:
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

In [17]:
main_dir = 31
sub_dir = 8

In [18]:
for r in tqdm_notebook([0]):
    print (f"replicate {r}")
    output_dir = f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/'
    
    print("analyzing complex")
    complex_path = os.path.join(output_dir, f"{r}_complex.nc")
    complex_dg, complex_ddg = analyze_dom(complex_path)
    print("analyzing apo")
    apo_path = os.path.join(output_dir, f"{r}_apo.nc")
    apo_dg, apo_ddg = analyze_dom(apo_path)

    results = np.array([complex_dg, complex_ddg, apo_dg, apo_ddg])
    with open(f'/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy', 'wb') as f:
        np.save(f, results)

  0%|          | 0/1 [00:00<?, ?it/s]



replicate 0
analyzing complex
n_iterations: 10000




analyzing apo
n_iterations: 10000


In [19]:
# Read the dicts in and compute free energies (kcal/mol)
d_results = {}
for r in [0]:
    with open(f"/data/chodera/zhangi/perses_benchmark/repex/{main_dir}/{sub_dir}/{r}/results.npy", "rb") as f:
        results = np.load(f, allow_pickle=True)
        d_results[r] = results
        complex_dg, complex_ddg, apo_dg, apo_ddg = [result[-1]*KT_KCALMOL for result in results]
        print(f"{r} complex: {complex_dg} ({complex_ddg}) kcal/mol")
        print(f"{r} apo: {apo_dg} ({apo_ddg}) kcal/mol")
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        print(f"{r} DDG: {binding_dg} ({binding_ddg}) kcal/mol")
        print()



0 complex: -227.12082165605312 (0.705428772647102) kcal/mol
0 apo: -225.75281689512838 (0.43596270355033) kcal/mol
0 DDG: -1.368004760924748 (0.8292727127822969) kcal/mol

