In [1]:
import numpy as np
import pandas as pd
from copy import copy, deepcopy
from sklearn.metrics import jaccard_score
import time
import matplotlib.pyplot as plt
from scipy.spatial import distance as scidist
import MDAnalysis as mda
from EpockGrid2VoidsAllostery_util import *
from scipy.ndimage import gaussian_filter1d
import argparse
import fnmatch
import os

'''
EpockGrid2VoidsAllostery. Step 4.
Author: Yulian Gavrilov
yulian.gavrilov@bpc.lu.se
yulian.gavrilov@gmail.com

Python code for the analysis of allostric communiction in proteins based on
the dynamics of the internal protein void_clusters.

See readme.txt for the further details.
'''

'\nEpockGrid2VoidsAllostery. Step 4.\nAuthor: Yulian Gavrilov\nyulian.gavrilov@bpc.lu.se\nyulian.gavrilov@gmail.com\n\nPython code for the analysis of allostric communiction in proteins based on\nthe dynamics of the internal protein void_clusters.\n\nSee readme.txt for the further details.\n'

In [2]:
system="cort_wt"
u1 = mda.Universe(f'./md3us_{system}_fitCav_dt100_prot_cut_nolig_fr0.pdb', f'./md3us_{system}_fitCav_dt1000_prot_cut_nolig.xtc')
#u1 = mda.Universe('./md3us_cort_wt_fitCav_dt100_prot_cut_nolig_fr0.pdb', './md3us_cort_wt_fitCav_dt1000_prot_cut_nolig.xtc')
nframes=len(u1.trajectory)
#nframes=28498
#nframes=2850


In [3]:
nframes

2850

In [4]:
# load here:
# atom_clusters_frames (after reindexing)
with open(f'{system}_atom_clusters_frames{nframes}_reindex.npy', 'rb') as f:
    atom_clusters_frames = np.load(f, allow_pickle=True)
# atom_clusters_frames_with_res
with open(f'{system}_atom_clusters_frames{nframes}_reindex_resindex.npy', 'rb') as f:
    atom_clusters_frames_with_res = np.load(f, allow_pickle=True)
# NMS
with open(f'{system}_Nmatrix_frames{nframes}.npy', 'rb') as f:
    NMS = np.load(f, allow_pickle=True)
# sort_NMS_dict
with open(f'{system}_Nmatrix_dict_frames{nframes}.npy', 'rb') as f:
    sort_NMS_dict = np.load(f, allow_pickle=True)#.item()
    
with open(f'{system}_all_clusters_storage_frames{nframes}.npy', 'rb') as f:
    cluster_storage_out = np.load(f, allow_pickle=True).item()   

In [None]:
print("show all clusters (in the last frame) after reindexing")
frame_ndx=0#nframes-1
for i in range(0, len(atom_clusters_frames[frame_ndx].clusters)):
    print (atom_clusters_frames[frame_ndx].clusters[i])
print("\n\n")
####

In [None]:
# print("\nShow all accumulated clusters (in the last frame) after reindexing")
# for i in range(0,len(cluster_storage_out.clusters)):
#     print (cluster_storage_out.clusters[i].clusterID, end = " ")
#     #print (cluster_storage_out.clusters[i], end = " ")
# print("\n",i)

In [None]:
#resid = [*range(250, 264, 1)] # all TIF2 residues 
#resid = [*range(0, 264, 1)] # all GR-TIF2 residues
res_in_frame_clusters = get_residues_in_clusters_count(atom_clusters_frames_with_res, nframes, resid = [5,6])

print (res_in_frame_clusters)

In [None]:
res_in_frames = get_residues_in_frames_count(atom_clusters_frames_with_res, nframes, resid = [5,6]) 
print (res_in_frames)


In [None]:
#print (sum(res_in_frame_clusters_keys.values()))
#print (res_in_frame_clusters_keys.values())
#dict(sorted(res_in_frame_clusters_keys.items(), key=lambda item: item[1], reverse=True))

In [5]:
split_merge_count = get_max_split_merge(sort_NMS_dict, split_merge_cutoff = 500)

print("N matrix elements with max number of splits/merges")

for i in split_merge_count:
    print (i, split_merge_count[i])
print("")

####

all_persistency_clIDs, all_persistency_clIDs_percent, cluster_persistency = get_cluster_persistency(atom_clusters_frames, 
                                                                                                          cluster_storage_out, 
                                                                                                          nframes, persistency_cutoff_percent=50)

print ("The cluster persistency (number of frames you can find the cluster in)")                                                                                                       
for i,j in zip(cluster_persistency,all_persistency_clIDs_percent):
    print (i[0],":",i[1],"or",all_persistency_clIDs_percent[j],"%")


N matrix elements with max number of splits/merges
0_10-0_47 1659.0
0_5-0_38 1195.0
0_5-0_33 1061.0
0_5-0_10 1009.0
0_1-0_5 905.0
0_3-0_10 798.0
0_47-0_61 697.0
0_27-0_40 619.0
0_10-0_46 598.0
0_10-0_27 591.0
0_10-0_29 566.0
0_10-4_19 560.0
0_10-0_61 525.0


The cluster persistency (number of frames you can find the cluster in)
0_10 : 2850 or 100 %
0_5 : 2769 or 97 %
0_47 : 2702 or 95 %
0_27 : 2669 or 94 %
0_33 : 2606 or 91 %
0_61 : 2294 or 80 %
0_40 : 2106 or 74 %
0_3 : 2071 or 73 %
0_49 : 2028 or 71 %
0_38 : 1978 or 69 %
0_1 : 1931 or 68 %
4_19 : 1604 or 56 %
0_32 : 1545 or 54 %
0_9 : 1498 or 53 %
0_23 : 1474 or 52 %
0_24 : 1430 or 50 %


In [6]:
print_cluster_volume_and_contacts(atom_clusters_frames, sort_NMS_dict, "0_10", volume_cutoff = 2, numb_contacts_cutoff = 500)


Volume (units: number of 1.125 Å cubes)
Input cluster: 0_10 :  5252.73 ± 1158.95
Clusters in contact with the input cluster:
0_47 :  421.96 ± 508.14 numb of contacts:  0_10-0_47 1659.0
0_5 :  1093.86 ± 1053.73 numb of contacts:  0_5-0_10 1009.0
0_3 :  99.02 ± 154.41 numb of contacts:  0_3-0_10 798.0
0_46 :  16.54 ± 53.8 numb of contacts:  0_10-0_46 598.0
0_27 :  468.34 ± 362.88 numb of contacts:  0_10-0_27 591.0
0_29 :  11.11 ± 37.94 numb of contacts:  0_10-0_29 566.0
4_19 :  59.63 ± 127.23 numb of contacts:  0_10-4_19 560.0
0_61 :  570.7 ± 546.59 numb of contacts:  0_10-0_61 525.0



In [None]:
res_in_cluster_dict, res_in_cluster_percent_dict, res_in_cluster_abs_percent_dict = \
get_res_persistency_in_cluster(atom_clusters_frames_with_res, 
                               all_persistency_clIDs, 
                               nframes, aclusterID = '0_10', 
                               first_res = 1, last_res = 265)

print ("keys - residue index, value - persistence (abs or %)")
#print (res_in_cluster_dict)
print (res_in_cluster_percent_dict)
#print (res_in_cluster_abs_percent_dict)

In [None]:
res_in_cluster_percent_dict[263]

In [7]:
numb_of_selec_contacts, selected_contacts_dict = get_cluster_group_contacts(atom_clusters_frames,
                                                                            sort_NMS_dict,all_persistency_clIDs_percent)

print ("Number of frames with split/merge events (contacts) between the clusters")
print ("(filtered to show only the persistant clusters based on \"persistency_cutoff_percent\" value)")
print("Total number of frames: ", nframes)
min_contacts=500
for i in numb_of_selec_contacts:
    if float(i[1]) > min_contacts:
        print (i[0], i[1])


Number of frames with split/merge events (contacts) between the clusters
(filtered to show only the persistant clusters based on "persistency_cutoff_percent" value)
Total number of frames:  2850
0_10-0_47 1659.0
0_5-0_38 1195.0
0_5-0_33 1061.0
0_5-0_10 1009.0
0_1-0_5 905.0
0_3-0_10 798.0
0_47-0_61 697.0
0_27-0_40 619.0
0_10-0_27 591.0
0_10-4_19 560.0
0_10-0_61 525.0


In [8]:
make_pymol_script (all_persistency_clIDs,
                   all_persistency_clIDs_percent,
                     atom_clusters_frames_with_res,
                     system,
                     nframes,
                     selected_contacts_dict,
                     persistency_cutoff_percent = 50,
                     sphere_radius_scaler = 8, # 3
                     radius_correction = 1000) # 1500

In [None]:
# show all accumulated clusters (in the last frame)
# after reindexing
# after substituting atom indices with residues' indices
# print("show all accumulated clusters (in the last frame) after reindexing; after substituting atom indices with residues' indices")
frame_ndx=nframes-1
for i in range(0, len(atom_clusters_frames_with_res[frame_ndx].clusters)):
    print (atom_clusters_frames_with_res[frame_ndx].clusters[i])
print("\n\n")
# ####

In [None]:
# # print all clusters' IDs
#
# for i in range(0,len(cluster_storage_out.clusters)):
#     print (cluster_storage_out.clusters[i].clusterID, end = " ")
# print("\n",i)

In [None]:
# GET CLUSTERS' VOLUME

volume_in_frames_1 = get_cluster_volume("0_10",atom_clusters_frames)
volume_in_frames_2 = get_cluster_volume("0_47",atom_clusters_frames)
volume_in_frames_3 = get_cluster_volume("0_5",atom_clusters_frames)
volume_in_frames_4 = get_cluster_volume("0_3",atom_clusters_frames)
volume_in_frames_5 = get_cluster_volume("0_46",atom_clusters_frames)


In [None]:
print(np.mean(volume_in_frames_1),"±",np.std(volume_in_frames_1))

In [None]:
# PLOT CLUSTERS' VOLUME

#https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.gaussian_filter1d.html


sigma = 20
fig, ax = plt.subplots(figsize=(6,4),dpi=100)

colors=['forestgreen','lime','blue','cornflowerblue']

ax.plot(gaussian_filter1d(volume_in_frames_1,sigma),color='g',linestyle='-')
ax.plot(gaussian_filter1d(volume_in_frames_2,sigma),color='b',linestyle='-')
ax.plot(gaussian_filter1d(volume_in_frames_3,sigma),color='y',linestyle='-')
ax.plot(gaussian_filter1d(volume_in_frames_4,sigma),color='m',linestyle='-')
ax.plot(gaussian_filter1d(volume_in_frames_5,sigma),color='cyan',linestyle='-')

#plt.legend(['cluster 0_5','cluster 3_2','cluster 0_0','cluster 0_9','cluster 24_1'],
#           loc='upper right',prop={"size":10},ncol=3, bbox_to_anchor=(1, -0.15))
#plt.legend(['cluster 0_0','cluster 0_4','cluster 0_6','cluster 0_10','cluster 4_4'],loc='upper right',prop={"size":10})
#plt.legend(['cluster 0_1'],loc='upper right',prop={"size":10})
plt.ylabel('Volume (number of probe spheres)',fontsize=10)
plt.xlabel('frame',fontsize=10);

plt.title( f'{system}. Change in volume of the internal voids clusters', fontsize = 10);

#plt.ylim([0, 100]);

#plt.xticks(bins_arr[::1]-10/2,bins_arr[::1],fontsize=7, rotation=45)
#plt.yticks(range(0,21,5),fontsize=15);

#plt.savefig(f'void_clusters_{system}_dt100_{nframes}fr_clusters.pdf', bbox_inches = "tight");
#plt.savefig(f'void_clusters_{system}_dt100_{nframes}fr_clusters_zoom.pdf', bbox_inches = "tight");
