In [1]:
from perses.analysis.analysis import Analysis
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pymbar
%matplotlib inline
import os
import itertools
from tqdm import tqdm_notebook
import pandas as pd

In [2]:
def analyze(forward_work, reverse_work, forward_accumulated, reverse_accumulated, dir_num, phase, output_dir, title): 
    # Substract offset
    print("subtracting offset")
    forward_work_offset = []
    for cycle in forward_work:
        forward_work_offset.append(np.array([val - cycle[0] for val in cycle[1:]]))
    forward_work_offset = np.array(forward_work_offset)

    reverse_work_offset = []
    for cycle in reverse_work:
        reverse_work_offset.append(np.array([val - cycle[0] for val in cycle[1:]]))
    reverse_work_offset = np.array(reverse_work_offset)
    
    # Compute dg, ddg
    print("computing dg, ddg")
    dg, ddg = pymbar.bar.BAR(forward_accumulated, reverse_accumulated)
    
    # Plot work trajectories
    print("plotting work trajs")
    for cycle in forward_work_offset:
        x = [(i+1)*4e-4 for i in range(len(list(cycle)))]
        y = cycle
        plt.plot(x, y, color=sns.color_palette()[0])
    for cycle in reverse_work_offset:
        x = [(i+1)*4e-4 for i in range(len(list(cycle)))]
        y = -cycle
        plt.plot(x, y, color=sns.color_palette()[1])
    plt.xlabel("$t_{neq}$ (ns)")
    plt.ylabel("work (kT)")
    plt.title(f"{title} {phase}")
    plt.savefig(os.path.join(output_dir, f"{dir_num}_{phase}_work_traj.png"), dpi=500)
    print(f"saved to: {os.path.join(output_dir, f'{dir_num}_{phase}_work_traj.png')}")
    plt.clf()
    
    # Plot work distributions
    print("plotting work distrib")
    accumulated_forward = [cycle[-1] for cycle in forward_work_offset]
    accumulated_reverse = [-cycle[-1] for cycle in reverse_work_offset]
    sns.distplot(accumulated_forward)
    sns.distplot(accumulated_reverse)
    plt.axvline(dg)
    plt.axvline(dg + ddg, linestyle='dashed')
    plt.axvline(dg - ddg, linestyle='dashed')
    plt.xlabel("work (kT)")
    plt.ylabel("p(w)")
    plt.title(f"{title} {phase}")
    plt.savefig(os.path.join(output_dir, f"{dir_num}_{phase}_work_dist.png"), dpi=500)
    print(f"saved to: {os.path.join(output_dir, f'{dir_num}_{phase}_work_dist.png')}")
    plt.clf()
    
    # Compute free energy 
    return dg, ddg
    

In [3]:
# Prep work arrays (from distributed jobs) and call analyze()
# titles = ["T42A", "A42T", "Y29A", "A29Y", "W38F", "F38W", "W44F", "F44W", "Y29F", "F29Y"]
titles = ["ALA->THR", "THR->ALA"]
d_results = {}
d_phases = {}
first = 11
for i in tqdm_notebook([11, 12]):
    print(f"dir: {i}")
    # Load and combine arrays
    forward_complex_arrays = []
    reverse_complex_arrays = []
    forward_apo_arrays = []
    reverse_apo_arrays = []
    for j in range(100):
        print(f"job: {j}")
        forward_complex_path = f'/data/chodera/zhangi/perses_benchmark/neq/10/{i}/{i}_vacuum_{j}_forward.npy'
        reverse_complex_path = f'/data/chodera/zhangi/perses_benchmark/neq/10/{i}/{i}_vacuum_{j}_reverse.npy'
        forward_apo_path = f'/data/chodera/zhangi/perses_benchmark/neq/10/{i}/{i}_solvent_{j}_forward.npy'
        reverse_apo_path = f'/data/chodera/zhangi/perses_benchmark/neq/10/{i}/{i}_solvent_{j}_reverse.npy'
        if os.path.exists(forward_complex_path):
            with open(forward_complex_path, 'rb') as f:
                array = np.load(f)
                forward_complex_arrays.append(array)
                print(array.shape)
        if os.path.exists(reverse_complex_path):
            with open(reverse_complex_path, 'rb') as f:
                array = np.load(f)
                reverse_complex_arrays.append(array)
                print(array.shape)
        if os.path.exists(forward_apo_path):
            with open(forward_apo_path, 'rb') as f:
#                 if j < 63 or j > 69:
                array = np.load(f)
                forward_apo_arrays.append(array)
                print(array.shape)
        if os.path.exists(reverse_apo_path):
            with open(reverse_apo_path, 'rb') as f:
#                 if j < 63 or j > 69:
                array = np.load(f)
                print(array.shape)
                reverse_apo_arrays.append(array)
    if forward_complex_arrays and reverse_complex_arrays and forward_apo_arrays and reverse_apo_arrays:
#     if forward_complex_arrays and reverse_complex_arrays:
#     if forward_apo_arrays and reverse_apo_arrays:
        
        forward_complex_combined = np.concatenate(forward_complex_arrays)
        forward_complex_accumulated = np.array([cycle[-1] - cycle[0] for cycle in forward_complex_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
        forward_complex_combined = np.array([cycle[0::100] for cycle in forward_complex_combined])
        print(forward_complex_combined.shape)
        
        reverse_complex_combined = np.concatenate(reverse_complex_arrays)
        reverse_complex_accumulated = np.array([cycle[-1] - cycle[0] for cycle in reverse_complex_combined])
        reverse_complex_combined = np.array([cycle[0::100] for cycle in reverse_complex_combined])
        
        forward_apo_combined = np.concatenate(forward_apo_arrays)
        forward_apo_accumulated = np.array([cycle[-1] - cycle[0] for cycle in forward_apo_combined])
        forward_apo_combined = np.array([cycle[0::100] for cycle in forward_apo_combined])
        print(forward_apo_combined.shape)
        
        reverse_apo_combined = np.concatenate(reverse_apo_arrays)
        reverse_apo_accumulated = np.array([cycle[-1] - cycle[0] for cycle in reverse_apo_combined])
        reverse_apo_combined = np.array([cycle[0::100] for cycle in reverse_apo_combined])
        
        # Analyze
        complex_dg, complex_ddg = analyze(forward_complex_combined, reverse_complex_combined, forward_complex_accumulated, reverse_complex_accumulated, i, 'vacuum', os.path.dirname(forward_complex_path), titles[i-first])
        apo_dg, apo_ddg = analyze(forward_apo_combined, reverse_apo_combined, forward_apo_accumulated, reverse_apo_accumulated, i, 'solvent', os.path.dirname(forward_apo_path), titles[i-first])
        binding_dg = complex_dg - apo_dg
        binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
        d_results[titles[i-first]] = [binding_dg, binding_ddg]
#         print(f"complex_dg: {complex_dg}")
#         print(f"apo dg: {apo_dg}, ddg: {apo_ddg}")
        print(f"apo dg: {apo_dg}, complex_dg: {complex_dg}")
        d_phases[titles[i-first]] = [apo_dg, apo_ddg, complex_dg, complex_ddg]
#         d_phases[titles[i-first]] = [apo_dg, apo_ddg]
    else:
        print(f"dir {i} has at least one phase without data" )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  import sys


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))

dir: 11
job: 0
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 1
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 2
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 3
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 4
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 5
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 6
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 7
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 8
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 9
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 10
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 11
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 12
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 13
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 14
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 15
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 16
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 17
(1, 250001)
(1, 250001)
(1, 250001)
(1, 25



saved to: /data/chodera/zhangi/perses_benchmark/neq/10/11/11_vacuum_work_dist.png
subtracting offset
computing dg, ddg
plotting work trajs
saved to: /data/chodera/zhangi/perses_benchmark/neq/10/11/11_solvent_work_traj.png
plotting work distrib




saved to: /data/chodera/zhangi/perses_benchmark/neq/10/11/11_solvent_work_dist.png
apo dg: -42.21183777304214, complex_dg: -39.678938768267265
dir: 12
job: 0
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 1
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 2
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 3
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 4
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 5
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 6
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 7
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 8
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 9
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 10
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 11
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 12
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 13
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 14
(1, 250001)
(1, 250001)
(1, 250001)
(1, 250001)
job: 15
(1, 250001)



saved to: /data/chodera/zhangi/perses_benchmark/neq/10/12/12_vacuum_work_dist.png
subtracting offset
computing dg, ddg
plotting work trajs
saved to: /data/chodera/zhangi/perses_benchmark/neq/10/12/12_solvent_work_traj.png
plotting work distrib




saved to: /data/chodera/zhangi/perses_benchmark/neq/10/12/12_solvent_work_dist.png
apo dg: 42.10127589409609, complex_dg: 38.92891532619634



<Figure size 432x288 with 0 Axes>

In [4]:
d_results

{'ALA->THR': [2.532899004774876, 0.11970644844379158],
 'THR->ALA': [-3.1723605678997515, 0.09296296348409354]}

Convert to kcal/mol

In [12]:
from simtk.openmm import unit
from openmmtools.constants import kB
KT_KCALMOL = kB * 300 * unit.kelvin / unit.kilocalories_per_mole

In [13]:
d_results_kcalmol = {}
for k, v in d_results.items():
    d_results_kcalmol[k] = [v[0]*KT_KCALMOL, v[1]*KT_KCALMOL]

In [14]:
d_results_kcalmol

{'ALA->THR': [1.5100180961459213, 0.0713644338107156],
 'THR->ALA': [-1.8912407703575769, 0.055420984756085964]}

In [5]:
d_phases

{'ALA->THR': [-42.21183777304214,
  0.04499930521020692,
  -39.678938768267265,
  0.11092653573255039],
 'THR->ALA': [42.10127589409609,
  0.042794070738717674,
  38.92891532619634,
  0.0825274505201423]}

In [6]:
import pickle
with open("/data/chodera/zhangi/perses_benchmark/neq/10/d_results.pickle", "wb") as f:
    pickle.dump(d_results, f)

import pickle
with open("/data/chodera/zhangi/perses_benchmark/neq/10/d_phases.pickle", "wb") as f:
    pickle.dump(d_phases, f)


In [4]:
# import pickle
# with open("/data/chodera/zhangi/perses_benchmark/neq/8/d_results.pickle", "rb") as f:
#     d_results = pickle.load(f)

# import pickle
# with open("/data/chodera/zhangi/perses_benchmark/neq/8/d_phases.pickle", "rb") as f:
#     d_phases = pickle.load(f)

In [None]:
# # Format to match Dominic's table
# d_combined = {}
# rows = [('ALA', 'SER'), ('ALA', 'CYS'), ('ALA', 'THR'), ('SER', 'CYS'), ('SER', 'THR'), ('CYS', 'THR')]
# for pair, (dg, ddg) in sorted(d_results.items()):
#     if pair in rows:
#         d_combined[pair] = [dg, ddg]

# for pair, (dg, ddg) in sorted(d_results.items()):
#     if pair not in d_combined:
#         reversed_pair = pair[::-1]
#         forward_solvation_dg = d_combined[reversed_pair][0]
#         forward_solvation_ddg = d_combined[reversed_pair][1]
#         d_combined[reversed_pair] += [dg, ddg, abs(forward_solvation_dg) - abs(dg), abs(forward_solvation_ddg) - abs(ddg)]



In [None]:
# # Create dataframe
# df = pd.DataFrame.from_dict(d_combined, orient='index', columns=['forward solvation dG', 
#                                                             'forward solvation ddG', 
#                                                             'reverse solvation dG',
#                                                             'reverse solvation ddG',
#                                                             'discrepancy dG',
#                                                             'discrepancy ddG'
#                                                            ]
                      
#                       )

In [None]:
# df.reindex(index=rows)

# Compute phase discrepancies

In [7]:
from simtk.openmm import unit
from openmmtools.constants import kB
KT_KCALMOL = kB * 300 * unit.kelvin / unit.kilocalories_per_mole

In [9]:
mutations = ["ALA->THR"]
d_discrepancy = {}
for mutation in mutations:
    reverse = "THR->ALA"
#     d_discrepancy[mutation] = [d_phases[mutation][0]*KT_KCALMOL, d_phases[reverse][0]*KT_KCALMOL, (d_phases[mutation][0] + d_phases[reverse][0])*KT_KCALMOL,
#                                d_phases[mutation][2]*KT_KCALMOL, d_phases[reverse][2]*KT_KCALMOL, (d_phases[mutation][2] + d_phases[reverse][2])*KT_KCALMOL]
    forward_apo_dg = d_phases[mutation][0]*KT_KCALMOL
    forward_apo_ddg = d_phases[mutation][1]*KT_KCALMOL
    reverse_apo_dg = d_phases[reverse][0]*KT_KCALMOL
    reverse_apo_ddg =  d_phases[reverse][1]*KT_KCALMOL
    apo_discrepancy = (d_phases[mutation][0] + d_phases[reverse][0])*KT_KCALMOL
    apo_uncertainty = np.sqrt((d_phases[mutation][1]*KT_KCALMOL)**2 + (d_phases[reverse][1]*KT_KCALMOL)**2)
    
    forward_complex_dg = d_phases[mutation][2]*KT_KCALMOL
    forward_complex_ddg = d_phases[mutation][3]*KT_KCALMOL
    reverse_complex_dg = d_phases[reverse][2]*KT_KCALMOL
    reverse_complex_ddg =  d_phases[reverse][3]*KT_KCALMOL
    complex_discrepancy = (d_phases[mutation][2] + d_phases[reverse][2])*KT_KCALMOL
    complex_uncertainty = np.sqrt((d_phases[mutation][3]*KT_KCALMOL)**2 + (d_phases[reverse][3]*KT_KCALMOL)**2)
    
    d_discrepancy[mutation] = [forward_apo_dg, forward_apo_ddg, reverse_apo_dg, reverse_apo_ddg, apo_discrepancy, apo_uncertainty, 
                              forward_complex_dg, forward_complex_ddg, reverse_complex_dg, reverse_complex_ddg, complex_discrepancy, complex_uncertainty]
    
    

In [10]:
d_discrepancy

{'ALA->THR': [-25.16509295819114,
  0.02682687507607326,
  25.0991801690757,
  0.025512198118201615,
  -0.06591278911543885,
  0.03702098700966265,
  -23.65507486204522,
  0.06613018362878458,
  23.207939398718125,
  0.04919972864266573,
  -0.4471354633270944,
  0.08242459878755073]}

In [11]:
import pandas as pd
pd.DataFrame.from_dict(d_discrepancy, orient='index').to_csv("/data/chodera/zhangi/perses_benchmark/neq/10/discrepancies.csv")