In [None]:
import MDAnalysis as mda
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from fe_gmx import AWH_Ensemble, AWH_2D_Ensemble

In [None]:
awh_ensemble = AWH_2D_Ensemble('../AWH', regenerate_awh=False)

In [None]:
awh_ensemble.awh_log[0]

In [None]:
time = awh_ensemble.awh_results.timeseries[-1]
awh_pmf = awh_ensemble.awh_results.pmf[-1]

In [None]:
awh_cv1 = awh_pmf.T[0][0]
awh_cv2 = awh_pmf[0].T[1]
awh_fes = awh_pmf[:,:,2].T

fig, ax = plt.subplots(figsize=(7,9))
mappable = ax.contourf(
            awh_cv1,
            awh_cv2,
            awh_fes,
#                vmax=100,
            levels=20)
plt.colorbar(mappable)

ax.set_xlabel('Loop C opening (nm)')
ax.set_ylabel('Distance (nm)')
ax.set_title(f'AWH PMF at {time} ns')

In [None]:
import os
import dask
from dask.distributed import Client, LocalCluster
from ENPMDA import MDDataFrame
from ENPMDA.preprocessing import TrajectoryEnsemble

In [None]:
cluster = LocalCluster(n_workers=16,
                       scheduler_port=8789,
                       memory_limit='4GB')

In [None]:
client = Client(cluster)

In [None]:
client

In [None]:
trajectory_list = []
topology_list = []
bonded_topology_list = []

In [None]:
default_raw_load_location = os.getcwd()

for folder in awh_ensemble.rep_folder:
    load_location = f'{default_raw_load_location}/{folder}'

    if not os.path.exists(load_location + '/../../start.pdb'):
        raise FileExistsError(load_location + '/../../start.pdb')
    trajectory_list.append(load_location +  "/awh.xtc")
    topology_list.append(load_location + '/../../start.pdb')
    bonded_topology_list.append(load_location + "/awh.tpr")

In [None]:
trajectory_ensemble = TrajectoryEnsemble(ensemble_name='loopc_epj_ensemble',
                                         trajectory_list=trajectory_list,
                                         topology_list=topology_list,
                                         bonded_topology_list=bonded_topology_list,
                                         skip=1,
                                         updating=False,
                                         only_raw=False,
                                         protein_selection='protein or resname EPJ'
                                         )
trajectory_ensemble.load_ensemble()

In [None]:
from ENPMDA.analysis.base import DaskChunkMdanalysis

In [None]:
ensemble_name = 'epj_awh_2d'
md_dataframe = MDDataFrame(dataframe_name=ensemble_name)
md_dataframe.add_traj_ensemble(trajectory_ensemble,
                            npartitions=30,
                            stride=1)

In [None]:
print(f"Number of frames in the dataframe is {len(md_dataframe.dataframe)}")
print(f"dt in the dataframe is {md_dataframe.dataframe.traj_time.diff()[1] /1000} ns")
print(f"Number of systems in the dataframe is {len(md_dataframe.dataframe.system.unique())}")
print(f"Existing features are {md_dataframe.dataframe.columns}")

In [None]:
u = mda.Universe('start.pdb')

ag_lig_1 = u.select_atoms('resname EPJ').split('residue')[0]

prot_binding_site = u.select_atoms('name CA and byres (protein and around 4 group ag_lig_1)', ag_lig_1=ag_lig_1)

#sort prot_resids
chain_resids = prot_binding_site.segids[np.argsort(prot_binding_site.resids)]
prot_resids = np.sort(prot_binding_site.resids)

prot_resids = prot_resids[np.argsort(chain_resids)]
chain_resids = np.sort(chain_resids)

print(prot_resids)
print(chain_resids)

next_subunit_dict = {'A': 'C',
                     'B': 'D',
                     'C': 'E',
                     'D': 'F',
                     'E': 'G',
                     'F': 'H',
                     'G': 'I',
                     'H': 'J',
                     'I': 'A',
                     'J': 'B'}

prot_selections = []

for i in range(5):
    # only makes senses for homomers
    selection = 'protein and ('
    for j in range(prot_resids.shape[0]):
        # iterate subunit
        selection += f'(resid {prot_resids[j]} and segid {chain_resids[j]}) or '
        chain_resids[j] = next_subunit_dict[chain_resids[j]]
    selection = selection[:-4] + ')'
    prot_selections.append(selection)

In [None]:
from MDAnalysis.analysis.distances import distance_array

class get_loopc_dynamics(DaskChunkMdanalysis):
    name = 'loopC'
    universe_file = 'protein'

    def set_feature_info(self, universe):
        return ['chn_{}'.format(i) for i in range(5)]

    def run_analysis(self, universe, start, stop, step):
        binding_sites = []
        ch_1_loopc = universe.select_atoms('segid A and resid 193-206 and name CA')
        ch_2_loopc = universe.select_atoms('segid C and resid 185-198 and name CA')
        ch_3_loopc = universe.select_atoms('segid E and resid 185-198 and name CA')
        ch_4_loopc = universe.select_atoms('segid G and resid 193-206 and name CA')
        ch_5_loopc = universe.select_atoms('segid I and resid 185-198 and name CA')

        for selection in prot_selections:
            binding_sites.append(universe.select_atoms(selection))

        loopC_sites = [ch_1_loopc, ch_2_loopc, ch_3_loopc, ch_4_loopc, ch_5_loopc]
        result = []
        for ts in universe.trajectory[start:stop:step]:
            result.append(np.asarray([distance_array(loopc.center_of_mass(),
                                                     bs.center_of_mass())[0]
                                            for loopc, bs in zip(loopC_sites, binding_sites)]))
        return result

In [None]:
md_dataframe.add_analysis(get_loopc_dynamics, overwrite=True)

In [None]:
md_dataframe.sort_analysis_result()

In [None]:
md_dataframe.save(ensemble_name)

## Retrieve the data

In [None]:
feature_dataframe = md_dataframe.get_feature([
                        'loopC',
                        ])

In [None]:
feature_dataframe

In [None]:
sns.histplot(data=feature_dataframe,
             x='loopC_chn_0',
             hue='system', bins=100, stat='probability')

In [None]:
sns.histplot(data=feature_dataframe,
             x='loopC_chn_3',
             hue='system', bins=100, stat='probability')