# Analyze

In [1]:
from perses.analysis.analysis import Analysis
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pymbar
%matplotlib inline
import os
import itertools
from tqdm import tqdm_notebook
import pandas as pd

In [2]:
def analyze(forward_work, reverse_work, forward_accumulated, reverse_accumulated, dir_num, title, phase, output_dir): 
    # Substract offset
    forward_work_offset = []
    for cycle in forward_work:
        forward_work_offset.append(np.array([val - cycle[0] for val in cycle[1:]]))
    forward_work_offset = np.array(forward_work_offset)

    reverse_work_offset = []
    for cycle in reverse_work:
        reverse_work_offset.append(np.array([val - cycle[0] for val in cycle[1:]]))
    reverse_work_offset = np.array(reverse_work_offset)
    
    # Compute dg, ddg
    dg, ddg = pymbar.bar.BAR(forward_accumulated, reverse_accumulated)
    
    # Plot work trajectories
    for cycle in forward_work_offset:
        x = [(i+1)*4e-3 for i in range(len(list(cycle)))]
        y = cycle
        plt.plot(x, y, color=sns.color_palette()[0])
    for cycle in reverse_work_offset:
        x = [(i+1)*4e-3 for i in range(len(list(cycle)))]
        y = -cycle
        plt.plot(x, y, color=sns.color_palette()[1])
    plt.xlabel("$t_{neq}$ (ps)")
    plt.ylabel("work (kT)")
    plt.title(title)
    plt.savefig(os.path.join(output_dir, f"{dir_num}_{phase}_work_traj.png"), dpi=500)
    plt.clf()
    
    # Plot work distributions
    accumulated_forward = [cycle[-1] for cycle in forward_work_offset]
    accumulated_reverse = [-cycle[-1] for cycle in reverse_work_offset]
    sns.distplot(accumulated_forward)
    sns.distplot(accumulated_reverse)
    plt.axvline(dg)
    plt.axvline(dg - ddg, linestyle='dotted')
    plt.axvline(dg + ddg, linestyle='dotted')
    plt.xlabel("work (kT)")
    plt.ylabel("p(w)")
    plt.title(title)
    plt.savefig(os.path.join(output_dir, f"{dir_num}_{phase}_work_dist.png"), dpi=500)
    plt.clf()
    
    # Compute free energy 
    return dg, ddg
    

In [4]:
# Prep work arrays (from distributed jobs) and call analyze()
d_results = {}
for i in tqdm_notebook([17]):
    print(f"dir: {i}")
    # Load and combine arrays
    forward_complex_arrays = []
    reverse_complex_arrays = []
    forward_apo_arrays = []
    reverse_apo_arrays = []
    for j in range(200):
        print(f"job: {j}")
        forward_complex_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_complex_{j}_forward.npy'
        reverse_complex_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_complex_{j}_reverse.npy'
        forward_apo_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_apo_{j}_forward.npy'
        reverse_apo_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_apo_{j}_reverse.npy'
        if os.path.exists(forward_complex_path):
            with open(forward_complex_path, 'rb') as f:
                forward_complex_arrays.append(np.load(f))
        if os.path.exists(reverse_complex_path):
            with open(reverse_complex_path, 'rb') as f:
                reverse_complex_arrays.append(np.load(f))
        if os.path.exists(forward_apo_path):
            with open(forward_apo_path, 'rb') as f:
                forward_apo_arrays.append(np.load(f))
        if os.path.exists(reverse_apo_path):
            with open(reverse_apo_path, 'rb') as f:
                reverse_apo_arrays.append(np.load(f))
#     if forward_complex_arrays and reverse_complex_arrays and forward_apo_arrays and reverse_apo_arrays:
#     if forward_apo_arrays and reverse_apo_arrays:
    if forward_complex_arrays and reverse_complex_arrays:
        forward_complex_combined = np.concatenate(forward_complex_arrays)
        forward_complex_accumulated = np.array([cycle[-1] - cycle[0] for cycle in forward_complex_combined])
        forward_complex_combined = np.array([cycle[0::10] for cycle in forward_complex_combined])
        reverse_complex_combined = np.concatenate(reverse_complex_arrays)
        reverse_complex_accumulated = np.array([cycle[-1] - cycle[0] for cycle in reverse_complex_combined])
        reverse_complex_combined = np.array([cycle[0::10] for cycle in reverse_complex_combined])
        
        
#         forward_apo_combined = np.concatenate(forward_apo_arrays)
#         forward_apo_accumulated = np.array([cycle[-1] - cycle[0] for cycle in forward_apo_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
#         forward_apo_combined = np.array([cycle[0::10] for cycle in forward_apo_combined])

#         reverse_apo_combined = np.concatenate(reverse_apo_arrays)
#         reverse_apo_accumulated = np.array([cycle[-1] - cycle[0] for cycle in reverse_apo_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
#         reverse_apo_combined = np.array([cycle[0::10] for cycle in reverse_apo_combined])
        
        # Analyze
        complex_dg, complex_ddg = analyze(forward_complex_combined, reverse_complex_combined, forward_complex_accumulated, reverse_complex_accumulated, i, 'T42A', 'complex', os.path.dirname(forward_complex_path))
#         apo_dg, apo_ddg = analyze(forward_apo_combined, reverse_apo_combined, forward_apo_accumulated, reverse_apo_accumulated, i, "T42A", 'apo', os.path.dirname(forward_apo_path))
#         binding_dg = apo_dg - complex_dg
#         binding_ddg = (apo_ddg**2 + complex_ddg**2)**0.5
#         d_results[i] = [binding_dg, binding_ddg]
#         print(f"apo_dg: {apo_dg}, ddg: {apo_ddg}")
        print(f"complex_dg: {complex_dg}, ddg:{complex_ddg}")
    else:
        print(f"dir {i} has at least one phase without data" )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

dir: 15
job: 0
job: 1
job: 2
job: 3
job: 4
job: 5
job: 6
job: 7
job: 8
job: 9
job: 10
job: 11
job: 12
job: 13
job: 14
job: 15
job: 16
job: 17
job: 18
job: 19
job: 20
job: 21
job: 22
job: 23
job: 24
job: 25
job: 26
job: 27
job: 28
job: 29
job: 30
job: 31
job: 32
job: 33
job: 34
job: 35
job: 36
job: 37
job: 38
job: 39
job: 40
job: 41
job: 42
job: 43
job: 44
job: 45
job: 46
job: 47
job: 48
job: 49
job: 50
job: 51
job: 52
job: 53
job: 54
job: 55
job: 56
job: 57
job: 58
job: 59
job: 60
job: 61
job: 62
job: 63
job: 64
job: 65
job: 66
job: 67
job: 68
job: 69
job: 70
job: 71
job: 72
job: 73
job: 74
job: 75
job: 76
job: 77
job: 78
job: 79
job: 80
job: 81
job: 82
job: 83
job: 84
job: 85
job: 86
job: 87
job: 88
job: 89
job: 90
job: 91
job: 92
job: 93
job: 94
job: 95
job: 96
job: 97
job: 98
job: 99
complex_dg: -46.05818532967324, ddg:0.25201052912846045



<Figure size 432x288 with 0 Axes>

In [5]:
print(f"apo dg: {apo_dg}, ddg: {apo_ddg}")
print(f"complex_dg: {complex_dg}, ddg:{complex_ddg}")

apo dg: -40.24489459381627, ddg: 0.344738090471024
complex_dg: -37.39300450353149, ddg:0.6928829409013308


In [6]:
d_results

{5: [-2.851890090284776, 0.7739064031352144]}