In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from database.query import get_trajattr
from conf.conf_analysis import protca
from utils.dataset import read_trajdata
from plot.plot_utilities import edgeformat, savefig, hist1d, hist2d

# Preparation

In [None]:
%store -r traj_ids
%store -r traj_ids_closed
%store -r states_df

%store -r map_assign
%store -r color_assign

In [None]:
label = 'tmpc1v2'
states = states_df[label]

# Time evolution of fractional populations of states

In [None]:
state_proportions = pd.DataFrame(columns=states.unique())

# In this soft assignment, there should not be any NaN
for t in np.arange(1000+1):
    state_breakdown = states_df.query('timestep == @t')[label].value_counts(normalize=True)
    state_proportions.loc[t, state_breakdown.index] = state_breakdown.values
    
# Unexpected change of datatype? Weird
state_proportions = state_proportions.astype(float)



# Plotting
fig, axs = plt.subplots()
edgeformat(axs)
for s in [3,0,2,1]:
    plt.plot(state_proportions[s], label=map_assign[s], color=color_assign[s])
    
axs.set_xlim(0,1000)
axs.set_ylim(0,0.5)
axs.set_xticks(np.arange(0,1000+1,200))
axs.set_xticklabels(np.arange(0,1000+1,200), fontsize=14)
axs.set_yticks(np.arange(0,0.5+0.1,0.1))
axs.set_yticklabels(np.arange(0,50+10,10), fontsize=14)
axs.set_xlabel("Time [ns]", fontsize=16)
axs.set_ylabel("Fraction of state [%]", fontsize=16)
axs.grid(True, ls='--')

plt.legend(ncols=2, loc=4)

# savefig("state_fraction.pdf")

# HOLE2 analysis of pore radius

In [None]:
%store -r hole2_df
%store -r states_df

# # average along the columns
# hole2_df_avg = hole2_df.mean(axis=0)
# # standard deviation along the columns
# hole2_df_std = hole2_df.std(axis=0)

label = 'tmpc1v2'

fig, axs = plt.subplots(figsize=(4, 6.4))
edgeformat(axs)

# TODO: tmp solution for unequal amount of data
states_df_tmp = pd.merge(states_df, hole2_df[['traj_id', 'timestep']])

for s in [3,0,2,1]:
    state_hole2 = hole2_df[states_df_tmp[label].values == s].iloc[:,2:]
    
    # average along the columns
    hole2_df_avg = state_hole2.mean(axis=0)
    # standard deviation along the columns
    hole2_df_std = state_hole2.std(axis=0)
    # standard error along the columns
    hole2_df_sem = state_hole2.sem(axis=0)
    
    plt.plot(hole2_df_avg.values, hole2_df_avg.index, label=map_assign[s], color=color_assign[s])
    # TODO: bootstrap profiles as a whole might be a better idea?
    plt.fill_betweenx(hole2_df_avg.index.astype(int), hole2_df_avg - 3*hole2_df_sem, hole2_df_avg + 3*hole2_df_sem, alpha=0.5)

plt.ylim(90,150)
plt.xlim(0,8)

axs.set_yticks(np.arange(90,150+10,10))
axs.set_yticklabels(np.arange(90,150+10,10)-130, fontsize=14)
axs.set_xticks(np.arange(0,8+2,2))
axs.set_xticklabels(np.arange(0,8+2,2), fontsize=14)
plt.xlabel("Average radius of the pore [Å]", fontsize=16)
plt.ylabel("z [Å]", fontsize=16)

plt.legend()
plt.grid(True)

# savefig("avg_hole2.pdf")

In [None]:
# Min within the bottleneck
fig, axs = plt.subplots(figsize=(6,2))
edgeformat(axs)

for s in [3,0,2,1]:
    state_hole2 = hole2_df[states_df_tmp[label].values == s]
    state_minrad = state_hole2.loc[:, 125:135].min(axis=1)
    hist1d(state_minrad, bins=75, range=[0,5]).plot(axs=axs, lw=2,
                                                    label=f"{map_assign[s]}: {round(state_minrad.mean(), 1)} Å", color=color_assign[s])
plt.legend()

axs.set_ylim(0,1.2)
axs.set_yticks(np.arange(0,1.2+0.3,0.3))
axs.set_yticklabels(np.arange(0,12+3,3)/10, fontsize=14)
axs.set_xticks(np.arange(5+1))
axs.set_xticklabels(np.arange(5+1), fontsize=14)

plt.xlabel(r"$R_{\mathrm{min}}$ [Å]", fontsize=16)
plt.ylabel("Prob. Density [A.U.]", fontsize=16)

plt.grid(True, ls='--')
plt.xlim(0,4)

# savefig("minrad_distrib.pdf")

# SASA

In [None]:
fig, Axs = plt.subplots(2, 1, sharex=True)

background = fig.add_subplot(111)
# Hide everything except the axis labels
background.spines['top'].set_color('none')
background.spines['bottom'].set_color('none')
background.spines['left'].set_color('none')
background.spines['right'].set_color('none')
background.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
# Also set the background to completely transparent
background.patch.set_alpha(0.0)

background.set_ylabel("Prob. Density [A.U.]", fontsize=16)
background.set_xlabel(r"SASA [$\mathrm{\AA^3}$]", fontsize=16)

for axs in Axs:
    edgeformat(axs)
    axs.grid(True, ls='--')

sasa_dat, _, _ = read_trajdata('sasa/t1115', traj_ids=traj_ids)

for s in sorted(states_df[label].unique()):
    subset_sasa = sasa_dat[states_df_tmp[label].values == s]
    hist1d(subset_sasa, bins=40, range=[0,80]).plot(Axs[0], label=s, color=color_assign[s])
    
# for model, val in t1115_sasa.items():
#     Axs[0].plot([val, val], [0,1], ls='--', alpha=0.5, color='grey')

Axs[0].set_xlim(0,80)
Axs[0].set_ylim(0,0.2)
Axs[0].set_yticks(np.arange(0,2+1,1)/10)
Axs[0].set_yticklabels(np.arange(0,2+1,1)/10, fontsize=12)
Axs[0].set_xticks(np.arange(0,80+10,10))
Axs[0].set_xticklabels(np.arange(0,80+10,10), fontsize=14)

# plt.xlabel(r"SASA [$\mathrm{\AA^3}$]", fontsize=16)
# plt.ylabel("Prob. Density [A.U.]", fontsize=16)

sasa_dat, _, _ = read_trajdata('sasa/s1118', traj_ids=traj_ids)

for s in sorted(states_df[label].unique()):
    subset_sasa = sasa_dat[states_df_tmp[label].values == s]
    hist1d(subset_sasa, bins=40, range=[0,80]).plot(Axs[1], label=s, color=color_assign[s])
    
# for model, val in s1118_sasa.items():
#     Axs[1].plot([val, val], [0,1], ls='--', alpha=0.5, color='grey')

Axs[1].set_xlim(0,80)
Axs[1].set_ylim(0,0.1)
Axs[1].set_yticks(np.arange(0,1+1,1)/10)
Axs[1].set_yticklabels(np.arange(0,1+1,1)/10, fontsize=12)
Axs[1].set_xticks(np.arange(0,80+10,10))
Axs[1].set_xticklabels(np.arange(0,80+10,10), fontsize=14)

# savefig('tm11_sasa.pdf')

# Residue distances

In [None]:
traj_ids_closed = np.arange(1932,1952)+1
conf_dat_closed = protca(traj_ids_closed, resids=[106, 334, 337, 338, 1118, 1122, 1115, 1134])
conf_dat_closed.load_cainfo()

conf_dat = protca(traj_ids, resids=[106, 334, 337, 338, 1118, 1122, 1115, 1134])
conf_dat.load_cainfo()

In [None]:
fig, Axs = plt.subplots(2, 1, sharex=True, gridspec_kw={'hspace': 0.3})

background = fig.add_subplot(111)
# Hide everything except the axis labels
background.spines['top'].set_color('none')
background.spines['bottom'].set_color('none')
background.spines['left'].set_color('none')
background.spines['right'].set_color('none')
background.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
# Also set the background to completely transparent
background.patch.set_alpha(0.0)

background.set_ylabel("Prob. Density [A.U.]", fontsize=16)
background.set_xlabel(r'C$\alpha$ distance [$\mathrm{\AA}$]', fontsize=16)

for axs in Axs:
    edgeformat(axs)
    axs.grid(True, ls='--')

    
### R334-T1122 ###
dist_df = conf_dat.cadist_rpair([334, 1122])

for s in sorted(states_df[label].unique()):
    select = (states_df[label].values == s)
    hist1d(dist_df[select]['dist'], bins=60, range=[5,35]).plot(axs=Axs[0], label=s, color=color_assign[s])
    # print(np.min(dist_df[select]['dist']), color_assign[s])
Axs[0].set_title('R334-T1122', fontsize=14)
Axs[0].set_xlim(5,35)
Axs[0].set_ylim(0,0.5)

    
### T338-S1118 ###
dist_df = conf_dat.cadist_rpair([338, 1118])


for s in sorted(states_df[label].unique()):
    select = (states_df[label].values == s)
    hist1d(dist_df[select]['dist'], bins=60, range=[5,35]).plot(axs=Axs[1], label=s, color=color_assign[s])
Axs[1].set_title('T338-S1118', fontsize=14)
Axs[1].set_xlim(5,35)
Axs[1].set_ylim(0)
    
# savefig("TM6-TM11_distances.pdf")

# Translocation paths vs states and voltage

## Translocation paths mapped onto PC space

In [None]:
# Pre-prequisites
%store -r transloc_df
%store -r r334sc
%store -r pca_df

In [None]:
labels = states_df[label].values

# Using merge to get states_df with traj_id and timestep in transloc_tf
transloc_states_df = states_df.merge(transloc_df, on=['traj_id', 'timestep'])
# Count the occurences of each state in transloc_states_df['tm11xy']
transloc_states_pca_df = pd.merge(transloc_states_df, pca_df, on=['traj_id', 'timestep'])
transloc_states_pca_df['Ez'] = transloc_states_pca_df['traj_id'].apply(lambda t: get_trajattr(t, 'voltage'))
transloc_states_pca_df['path_assign'] = transloc_states_pca_df['path_assign'].astype(int)
transloc_states_pca_df

In [None]:
color_dict = {0:'red', 1:'blue', 2:'limegreen'}
# color_dict = {0:'green', 1:'blue', 2:'green'}

# Quick contour view
fig, axs = plt.subplots()

xrange = [-70,70]
yrange = [-55,55]

hist = hist2d(*pca_df[['pc1', 'pc2']].values.T, bins=60, range=[xrange, yrange])
plot = hist.hist2d_contour(axs, lw=0.3, colors='black')
edgeformat(axs)

for path in np.arange(3):
    pc1, pc2 = transloc_states_pca_df.query("path_assign == @path")[['pc1', 'pc2']].values.T
    axs.scatter(pc1, pc2, c=color_dict[path], s=4, label=path, alpha=0.5)

axs.set_xlabel(r'PC1 [$\mathrm{\AA}$]', fontsize=16)
axs.set_ylabel(r'PC2 [$\mathrm{\AA}$]', fontsize=16)
axs.set_xlim(*xrange)
axs.set_ylim(*yrange)

# savefig("transloc_path_proj_pc.pdf")

In [None]:
# Breakdown by states
# Present as bar graph

fig, axs = plt.subplots(figsize=(6,2))
edgeformat(axs)

barbase = np.zeros(4)

transloc_subdf = transloc_states_pca_df
print(len(transloc_subdf))

path_labels = ['1-12', '1-6', 'intermed.']

collect_counts = []
path_label_order = [1,0,2]

for path in path_label_order:
    counts = [dict(transloc_subdf.query("tmpc1v2 == @s")['path_assign'].value_counts()).get(path, 0) for s in [3,0,2,1]]
    print(counts)
    counts = [dict(transloc_subdf.query("tmpc1v2 == @s")['path_assign'].value_counts()).get(path, 0) / len(transloc_subdf) for s in [3,0,2,1]]
    # print(counts)
    counts = np.array(counts)

    axs.bar(np.arange(4), counts, bottom=barbase, color=color_dict[path], label=path_labels[path], zorder=3)
    barbase += counts
    
    collect_counts.append(counts)

axs.set_xticks(np.arange(4))
axs.set_xticklabels([map_assign[s] for s in [3,0,2,1]], fontsize=16)
axs.set_yticks(np.arange(0,100+20,20)/100)
axs.set_yticklabels(np.arange(0,100+20,20), fontsize=12)
axs.set_ylabel("Fraction [%]", fontsize=16)

axs.grid(axis='y', ls='--', zorder=0)

### Adapted from: https://www.statology.org/matplotlib-legend-order/
# get handles and labels
handles, labels = plt.gca().get_legend_handles_labels()
# specify order of items in legend
order = [2,1,0]
# add legend to plot
axs.legend([handles[idx] for idx in order], [labels[idx] for idx in order])

# savefig("transloc_path_distribution.pdf")

## By voltage: barplot

In [None]:
# Breakdown by states
# Present as bar graph

fig, axs = plt.subplots(figsize=(6,2))
edgeformat(axs)

barbase = np.zeros(4)

transloc_subdf = transloc_states_pca_df.query("Ez >= -0.006 & Ez <= 0.006")
print(len(transloc_subdf))

path_labels = ['1-12', '1-6', 'intermed.']
path_label_order = [1,0,2]

collect_counts = []
for path in path_label_order:
    counts = [dict(transloc_subdf.query("tmpc1v2 == @s")['path_assign'].value_counts()).get(path, 0) for s in [3,0,2,1]]
    print(counts)
    counts = [dict(transloc_subdf.query("tmpc1v2 == @s")['path_assign'].value_counts()).get(path, 0) / len(transloc_subdf) for s in [3,0,2,1]]
    # print(counts)
    counts = np.array(counts)

    axs.bar(np.arange(4), counts, bottom=barbase, color=color_dict[path], label=path_labels[path], zorder=3)
    barbase += counts
    
    collect_counts.append(counts)

axs.set_xticks(np.arange(4))
axs.set_xticklabels([map_assign[s] for s in [3,0,2,1]], fontsize=16)
axs.set_yticks(np.arange(0,100+20,20)/100)
axs.set_yticklabels(np.arange(0,100+20,20), fontsize=12)
axs.set_ylabel("Fraction [%]", fontsize=16)

axs.grid(axis='y', ls='--', zorder=0)

### Adapted from: https://www.statology.org/matplotlib-legend-order/
# get handles and labels
handles, labels = plt.gca().get_legend_handles_labels()
# specify order of items in legend
order = [2,1,0]
# add legend to plot
axs.legend([handles[idx] for idx in order], [labels[idx] for idx in order])

# savefig("transloc_path_distribution_lowvoltage.pdf")