# Analyze

In [None]:
from perses.analysis.analysis import Analysis
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pymbar
%matplotlib inline
import os
import itertools
from tqdm import tqdm_notebook
import pandas as pd

In [4]:
def analyze(forward_work, reverse_work, forward_accumulated, reverse_accumulated, dir_num, title, phase, output_dir): 
    # Substract offset
    forward_work_offset = []
    for cycle in forward_work:
        forward_work_offset.append(np.array([val - cycle[0] for val in cycle[1:]]))
    forward_work_offset = np.array(forward_work_offset)

    reverse_work_offset = []
    for cycle in reverse_work:
        reverse_work_offset.append(np.array([val - cycle[0] for val in cycle[1:]]))
    reverse_work_offset = np.array(reverse_work_offset)
    
    # Compute dg, ddg
    dg, ddg = pymbar.bar.BAR(forward_accumulated, reverse_accumulated)
    
    # Plot work trajectories
    for cycle in forward_work_offset:
        x = [(i+1)*4e-3 for i in range(len(list(cycle)))]
        y = cycle
        plt.plot(x, y, color=sns.color_palette()[0])
    for cycle in reverse_work_offset:
        x = [(i+1)*4e-3 for i in range(len(list(cycle)))]
        y = -cycle
        plt.plot(x, y, color=sns.color_palette()[1])
    plt.xlabel("$t_{neq}$ (ps)")
    plt.ylabel("work (kT)")
    plt.title(title)
    plt.savefig(os.path.join(output_dir, f"{dir_num}_{phase}_work_traj.png"), dpi=500)
    plt.clf()
    
    # Plot work distributions
    accumulated_forward = [cycle[-1] for cycle in forward_work_offset]
    accumulated_reverse = [-cycle[-1] for cycle in reverse_work_offset]
    sns.distplot(accumulated_forward)
    sns.distplot(accumulated_reverse)
    plt.axvline(dg)
    plt.axvline(dg - ddg, linestyle='dotted')
    plt.axvline(dg + ddg, linestyle='dotted')
    plt.xlabel("work (kT)")
    plt.ylabel("p(w)")
    plt.title(title)
    plt.savefig(os.path.join(output_dir, f"{dir_num}_{phase}_work_dist.png"), dpi=500)
    plt.clf()
    
    # Compute free energy 
    return dg, ddg
    

In [3]:
# Global variables
amino_acids = ['ALA', 'CYS', 'SER', 'THR']

# Create list of tuples for every pair of amino acids
pairs = list(itertools.permutations(amino_acids, r=2))

In [5]:
# Prep work arrays (from distributed jobs) and call analyze()
d_results = {}
for i in tqdm_notebook([18]):
    print(f"dir: {i}")
    # Load and combine arrays
    forward_solvent_arrays = []
    reverse_solvent_arrays = []
    forward_vacuum_arrays = []
    reverse_vacuum_arrays = []
    for j in range(200):
        print(f"job: {j}")
        forward_solvent_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_solvent_{j}_forward.npy'
        reverse_solvent_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_solvent_{j}_reverse.npy'
        forward_vacuum_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_vacuum_{j}_forward.npy'
        reverse_vacuum_path = f'/data/chodera/zhangi/perses_benchmark/neq/7/{i}/{i}_vacuum_{j}_reverse.npy'
        if os.path.exists(forward_solvent_path):
            with open(forward_solvent_path, 'rb') as f:
                forward_solvent_arrays.append(np.load(f))
        if os.path.exists(reverse_solvent_path):
            with open(reverse_solvent_path, 'rb') as f:
                reverse_solvent_arrays.append(np.load(f))
        if os.path.exists(forward_vacuum_path):
            with open(forward_vacuum_path, 'rb') as f:
                forward_vacuum_arrays.append(np.load(f))
        if os.path.exists(reverse_vacuum_path):
            with open(reverse_vacuum_path, 'rb') as f:
                reverse_vacuum_arrays.append(np.load(f))
    if forward_solvent_arrays and reverse_solvent_arrays and forward_vacuum_arrays and reverse_vacuum_arrays:
#     if forward_vacuum_arrays and reverse_vacuum_arrays:
        forward_solvent_combined = np.concatenate(forward_solvent_arrays)
        forward_solvent_combined = np.array([cycle[0::10] for cycle in forward_solvent_combined])
        reverse_solvent_combined = np.concatenate(reverse_solvent_arrays)
        reverse_solvent_combined = np.array([cycle[0::10] for cycle in reverse_solvent_combined])
        
        
        forward_vacuum_combined = np.concatenate(forward_vacuum_arrays)
        forward_vacuum_accumulated = np.array([cycle[-1] - cycle[0] for cycle in forward_vacuum_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
#         forward_vacuum_combined = np.array([cycle[0::10] for cycle in forward_vacuum_combined])

        reverse_vacuum_combined = np.concatenate(reverse_vacuum_arrays)
        reverse_vacuum_accumulated = np.array([cycle[-1] - cycle[0] for cycle in reverse_vacuum_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
#         reverse_vacuum_combined = np.array([cycle[0::10] for cycle in reverse_vacuum_combined])
        
        # Analyze
        solvent_dg, solvent_ddg = analyze(forward_solvent_combined, reverse_solvent_combined, forward_solvent_accumulated, reverse_solvent_accumulated, i, 'solvent', os.path.dirname(forward_solvent_path))
        vacuum_dg, vacuum_ddg = analyze(forward_vacuum_combined, reverse_vacuum_combined, forward_vacuum_accumulated, reverse_vacuum_accumulated, i, "THR->ALA", 'vacuum', os.path.dirname(forward_vacuum_path))
        solvation_dg = vacuum_dg - solvent_dg
        solvation_ddg = (vacuum_ddg**2 + solvent_ddg**2)**0.5
        d_results[pairs[i]] = [solvation_dg, solvation_ddg]
#         print(f"vacuum_dg: {vacuum_dg}, ddg: {vacuum_ddg}")

#         print(f"vacuum dg: {vacuum_dg}, solvent_dg: {solvent_dg}")
    else:
        print(f"dir {i} has at least one phase without data" )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

dir: 18
job: 0
job: 1
job: 2
job: 3
job: 4
job: 5
job: 6
job: 7
job: 8
job: 9
job: 10
job: 11
job: 12
job: 13
job: 14
job: 15
job: 16
job: 17
job: 18
job: 19
job: 20
job: 21
job: 22
job: 23
job: 24
job: 25
job: 26
job: 27
job: 28
job: 29
job: 30
job: 31
job: 32
job: 33
job: 34
job: 35
job: 36
job: 37
job: 38
job: 39
job: 40
job: 41
job: 42
job: 43
job: 44
job: 45
job: 46
job: 47
job: 48
job: 49
job: 50
job: 51
job: 52
job: 53
job: 54
job: 55
job: 56
job: 57
job: 58
job: 59
job: 60
job: 61
job: 62
job: 63
job: 64
job: 65
job: 66
job: 67
job: 68
job: 69
job: 70
job: 71
job: 72
job: 73
job: 74
job: 75
job: 76
job: 77
job: 78
job: 79
job: 80
job: 81
job: 82
job: 83
job: 84
job: 85
job: 86
job: 87
job: 88
job: 89
job: 90
job: 91
job: 92
job: 93
job: 94
job: 95
job: 96
job: 97
job: 98
job: 99
job: 100
job: 101
job: 102
job: 103
job: 104
job: 105
job: 106
job: 107
job: 108
job: 109
job: 110
job: 111
job: 112
job: 113
job: 114
job: 115
job: 116
job: 117
job: 118
job: 119
job: 120
job: 121
job:

<Figure size 432x288 with 0 Axes>

In [8]:
forward_vacuum_path = '/data/chodera/zhangi/perses_benchmark/neq/7/7/7_vacuum_0_forward.npy'
with open(forward_vacuum_path, 'rb') as f:
    array = np.load(f)

In [9]:
array

array([[0.        , 0.0032781 , 0.00657016, ...,        nan,        nan,
               nan]])

In [9]:
# Prep work arrays (from distributed jobs) and call analyze()
d_results = {}
for i in tqdm_notebook(['9']):
    print(f"dir: {i}")
    # Load and combine arrays
    forward_solvent_arrays = []
    reverse_solvent_arrays = []
    forward_vacuum_arrays = []
    reverse_vacuum_arrays = []
    for j in range(100):
        print(f"job: {j}")
        forward_solvent_path = f'/data/chodera/zhangi/perses_benchmark/neq/3/{i}/{i}_solvent_{j}_forward.npy'
        reverse_solvent_path = f'/data/chodera/zhangi/perses_benchmark/neq/3/{i}/{i}_solvent_{j}_reverse.npy'
        forward_vacuum_path = f'/data/chodera/zhangi/perses_benchmark/neq/3/{i}/{i}_vacuum_{j}_forward.npy'
        reverse_vacuum_path = f'/data/chodera/zhangi/perses_benchmark/neq/3/{i}/{i}_vacuum_{j}_reverse.npy'
        if os.path.exists(forward_solvent_path):
            with open(forward_solvent_path, 'rb') as f:
                forward_solvent_arrays.append(np.load(f))
        if os.path.exists(reverse_solvent_path):
            with open(reverse_solvent_path, 'rb') as f:
                reverse_solvent_arrays.append(np.load(f))
        if os.path.exists(forward_vacuum_path):
            with open(forward_vacuum_path, 'rb') as f:
                forward_vacuum_arrays.append(np.load(f))
        if os.path.exists(reverse_vacuum_path):
            with open(reverse_vacuum_path, 'rb') as f:
                reverse_vacuum_arrays.append(np.load(f))
#     if forward_solvent_arrays and reverse_solvent_arrays and forward_vacuum_arrays and reverse_vacuum_arrays:
    if forward_vacuum_arrays and reverse_vacuum_arrays:
#         forward_solvent_combined = np.concatenate(forward_solvent_arrays)
#         forward_solvent_combined = np.array([cycle[0::10] for cycle in forward_solvent_combined])
#         reverse_solvent_combined = np.concatenate(reverse_solvent_arrays)
#         reverse_solvent_combined = np.array([cycle[0::10] for cycle in reverse_solvent_combined])
        
        
        forward_vacuum_combined = np.concatenate(forward_vacuum_arrays)
        forward_vacuum_accumulated = np.array([cycle[-1] - cycle[0] for cycle in forward_vacuum_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
#         forward_vacuum_combined = np.array([cycle[0::10] for cycle in forward_vacuum_combined])

        reverse_vacuum_combined = np.concatenate(reverse_vacuum_arrays)
        reverse_vacuum_accumulated = np.array([cycle[-1] - cycle[0] for cycle in reverse_vacuum_combined]) # compute this separately bc the last value of the subsampled array is diff than the actual last sample
#         reverse_vacuum_combined = np.array([cycle[0::10] for cycle in reverse_vacuum_combined])
        
        # Analyze
#         solvent_dg, solvent_ddg = analyze(forward_solvent_combined, reverse_solvent_combined, forward_solvent_accumulated, reverse_solvent_accumulated, i, 'solvent', os.path.dirname(forward_solvent_path))
        vacuum_dg, vacuum_ddg = analyze(forward_vacuum_combined, reverse_vacuum_combined, forward_vacuum_accumulated, reverse_vacuum_accumulated, 9, 'vacuum', os.path.dirname(forward_vacuum_path))
#         solvation_dg = vacuum_dg - solvent_dg
#         solvation_ddg = (vacuum_ddg**2 + solvent_ddg**2)**0.5
#         d_results[pairs[i]] = [solvation_dg, solvation_ddg]
        print(f"vacuum_dg: {vacuum_dg}, ddg: {vacuum_ddg}")

#         print(f"vacuum dg: {vacuum_dg}, solvent_dg: {solvent_dg}")
    else:
        print(f"dir {i} has at least one phase without data" )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

dir: 9
job: 0
job: 1
job: 2
job: 3
job: 4
job: 5
job: 6
job: 7
job: 8
job: 9
job: 10
job: 11
job: 12
job: 13
job: 14
job: 15
job: 16
job: 17
job: 18
job: 19
job: 20
job: 21
job: 22
job: 23
job: 24
job: 25
job: 26
job: 27
job: 28
job: 29
job: 30
job: 31
job: 32
job: 33
job: 34
job: 35
job: 36
job: 37
job: 38
job: 39
job: 40
job: 41
job: 42
job: 43
job: 44
job: 45
job: 46
job: 47
job: 48
job: 49
job: 50
job: 51
job: 52
job: 53
job: 54
job: 55
job: 56
job: 57
job: 58
job: 59
job: 60
job: 61
job: 62
job: 63
job: 64
job: 65
job: 66
job: 67
job: 68
job: 69
job: 70
job: 71
job: 72
job: 73
job: 74
job: 75
job: 76
job: 77
job: 78
job: 79
job: 80
job: 81
job: 82
job: 83
job: 84
job: 85
job: 86
job: 87
job: 88
job: 89
job: 90
job: 91
job: 92
job: 93
job: 94
job: 95
job: 96
job: 97
job: 98
job: 99
vacuum_dg: -47.47230410221346, ddg: 0.09503482556089331



<Figure size 432x288 with 0 Axes>