In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mdtraj as md

from database.query import get_protdef
from conf.tmhelix import helix_positions, inner_leaflet_defs, outer_leaflet_defs
from utils.atomselect import select_resids_str, advanced_combine
from utils.output_namespace import aligned_pdbfile
from plot.plot_utilities import edgeformat, hist1d, hist2d, savefig

# Preparation

In [None]:
%store -r traj_ids
%store -r traj_ids_closed

# Dict assigning integer state labels to names of the states
%store -r map_assign
# Dict assigning colors to the states
%store -r color_assign
# Dataframe containing the state assignments
%store -r states_df

# Outer leaflet

In [None]:
leveldefs = outer_leaflet_defs
# Revised to reflect repeating resID in leveldefs
resids_zlevel = np.array(list(leveldefs.values())).flatten()

# Load the pdb structure of 6MSM
pdbstructure_6msm = md.load(aligned_pdbfile(pdb_code='6msm'))
# Select the CA atoms of the resIDs in the z-level we want to plot
ca_zlevel_6msm = np.hstack([pdbstructure_6msm.top.select(f'name CA and resSeq {r}') for r in resids_zlevel])
# Retrieve the xy coordinates of the CA atoms of interest
xy_6msm = pdbstructure_6msm.xyz[0, ca_zlevel_6msm, :2]*10

# Load the pdb structure of 5UAK
pdbstruc_5uak = md.load(aligned_pdbfile('5uak'))
# Select the CA atoms of the resIDs in the z-level we want to plot
ca_zlevel_5uak = np.hstack([pdbstruc_5uak.top.select(f'name CA and resSeq {r}') for r in resids_zlevel])
# Retrieve the xy coordinates of the CA atoms of interest
xy_5uak = pdbstruc_5uak.xyz[0, ca_zlevel_5uak, :2]*10

In [None]:
# Get protein domain definitions
domain_dict = get_protdef(protein_id=1)
# Names of the domains to align
domains_align = [f"TM{n}" for n in np.arange(12)+1]
# ResIDs to align to the reference with
refalign_resids, _ = advanced_combine(1, 30, 1, *[domain_dict.get(tm) for tm in domains_align])
# Process the resids to a selection string for mdtraj
resid_alignment_selection_string = select_resids_str(refalign_resids, package='mdtraj') + " and backbone"

# xyz coordinates of the resIDs to align to
# Reference structure is 6MSM
align_xyz = pdbstructure_6msm.xyz[0, pdbstructure_6msm.top.select(resid_alignment_selection_string)]*10

In [None]:
# Helix numbers to include/plot
helixnums   =   list(range(1,12+1))
# xyrange     =   [[-5,35],[30,70]]
# Range of xy to show in the plot
xyrange     =   [[-15,55],[0,70]] # For showing all helices
# xyrange     =   [[-45,55],[0,100]] # For showing all helices from below
# Number of bins in the histogram; both in x and y
bins        =   100

dataset = helix_positions(traj_ids, helixnums=helixnums, leveldefs=leveldefs)

In [None]:
# Used to make polar plot
# Helices in pentagonal pore
pentagon_helices = [1,6,8,11,12]
# Use traj_ids as a reference for the pentagonal pore
traj_ids_for_penta = traj_ids

# Load the dataset used as a penta-pore reference and get the center
helix_com_for_penta = helix_positions(traj_ids_for_penta, helixnums=pentagon_helices, leveldefs=leveldefs).helix_com[['x', 'y']].values
penta_center = np.mean(helix_com_for_penta, axis=0)

# Center of 5 helices in xy plane
centerx, centery = penta_center

# Shift the xy coordinates of the helices to the center
dataset.helix_com['xc'] = dataset.helix_com['x'] - centerx
dataset.helix_com['yc'] = dataset.helix_com['y'] - centery

# Centered range
crange = [[x-centerx for x in xyrange[0]], [y-centery for y in xyrange[1]]]

# Shift the xy coordinates of the helices from pdb structures to the center
xy_6msm = xy_6msm - np.array([centerx, centery])
xy_5uak = xy_5uak - np.array([centerx, centery])

In [None]:
# Contour plot showing density of positions
fig, axs = plt.subplots()

for h in helixnums:
    helix_cxy = dataset.helix_com.query('helix == @h')[['xc', 'yc']].values
    helix_hist2d = hist2d(*helix_cxy.T, bins=bins, range=crange)
    helix_hist2d.dens2d_preset2(axs, lines=True, lw=0.2, nlevels=15, lmax=0.32)

axs.scatter(*xy_6msm.T, c='cyan', marker='x', s=32)
# axs.scatter(*xy_5uak.T, c='blue', marker='x', s=32)

axs.set_aspect('equal', adjustable='box', anchor='C')
axs.grid(True, ls='--')
axs.set_xlim(-20,30)
axs.set_ylim(-30,20)
axs.set_xlabel('x [Å]', fontsize=14)
axs.set_ylabel('y [Å]', fontsize=14)

# savefig('allhelix_xy_outer.pdf')

# Closed states

In [None]:
dataset2 = helix_positions(traj_ids_closed, helixnums=helixnums, leveldefs=outer_leaflet_defs)

# Used to make polar plot
# Exclude helix 2 as it is not part of the suggested pentamer

dataset2.helix_com['xc'] = dataset2.helix_com['x'] - centerx
dataset2.helix_com['yc'] = dataset2.helix_com['y'] - centery

# Centered range
crange = [[x-centerx for x in xyrange[0]], [y-centery for y in xyrange[1]]]

## Outer leaflet TM arrangement is the same regardless of NBD dimerization

In [None]:
%store -r dist_nbd_5uak

fig, axs = plt.subplots(1,2, figsize=(8,12), gridspec_kw={'wspace': 0.4})

for h in helixnums:
    helix_cxy = dataset2.helix_com.query('helix == @h')[['xc', 'yc']].values[dist_nbd_5uak >= 40]
    helix_hist2d = hist2d(*helix_cxy.T, bins=bins, range=crange)
    helix_hist2d.dens2d_preset2(axs[0], lines=True, lw=0.2, nlevels=15, lmax=0.77)
    print(helix_hist2d.densmax)

# axs[0].scatter(*xy_6msm.T, c='red', marker='x', s=32)
axs[0].scatter(*xy_5uak.T, c='blue', marker='x', s=32)

axs[0].set_xlim(-20,30)
axs[0].set_ylim(-30,20)
axs[0].set_xlabel('x [Å]', fontsize=16)
axs[0].set_ylabel('y [Å]', fontsize=16)


for h in helixnums:
    helix_cxy = dataset2.helix_com.query('helix == @h')[['xc', 'yc']].values[dist_nbd_5uak < 40]
    helix_hist2d = hist2d(*helix_cxy.T, bins=bins, range=crange)
    helix_hist2d.dens2d_preset2(axs[1], lines=True, lw=0.2, nlevels=15, lmax=0.77)
    print(helix_hist2d.densmax)

# axs[1].scatter(*xy_6msm.T, c='red', marker='x', s=32)
axs[1].scatter(*xy_5uak.T, c='blue', marker='x', s=32)

axs[1].set_xlim(-20,30)
axs[1].set_ylim(-30,20)
axs[1].set_xlabel('x [Å]', fontsize=16)
axs[1].set_ylabel('y [Å]', fontsize=16)

# savefig("5uak_helix_xy_distribution.pdf")

## Radial plot

In [None]:
extent_xy = 20
grid_stride = 4

# Convert x,y to polar coordinates
dataset.helix_com['r'] = np.sqrt(dataset.helix_com['xc']**2 + dataset.helix_com['yc']**2)
dataset.helix_com['theta'] = np.arctan2(dataset.helix_com['yc'], dataset.helix_com['xc'])

### Breakdown by tmpc states

In [None]:
# Use dimensions of axes from the previous plot for the polar plot
bbox = axs.get_position()
x0, y0, width, height = bbox.x0, bbox.y0, bbox.width, bbox.height
rect = [x0, y0, width, height]

states_df = states_df.query("traj_id in @traj_ids")
states_df.reset_index(inplace=True)

state_def   = 'tmpc1v2'
nstates     = 4

fig = plt.figure(figsize=(8,8))

for s in np.arange(nstates):
    state_select = (states_df[state_def].values == s)

    # Contour plot showing density of positions
    # TODO: makes add_subplot() layout more general
    axs = fig.add_subplot(2,2,s+1)

    # Pore lining helices
    for h in [1,6,8,11,12]:
        hcoords = dataset.helix_com.query('helix == @h').loc[state_select][['xc', 'yc']].values
        # Fixed: There is a bug about contour levels must be increasing
        # So there might be values that if added to the cumul density, jumps through the contour
        # Pathological example shows that cumul 90 and 100 are the same level values
        # Reducing num of contour levels or increasing resolution of contour (more bins) might help
        hist = hist2d(*hcoords.T, range=crange, bins=bins)
        plot = hist.dens2d_preset2(axs, cbar_show=False, lw=0.3)
    
    # Alt setting
    axs.set_aspect('equal', adjustable='box', anchor='C')
    axs.set_xticks(np.arange(-16,16+1,4))
    axs.set_xticklabels([None]*len(np.arange(-16,16+1,4))) ##
    axs.set_yticks(np.arange(-16,16+1,4))
    axs.set_yticklabels([None]*len(np.arange(-16,16+1,4))) ##
    axs.grid(False)
    axs.set_xlim(-16,16)
    axs.set_ylim(-16,16)
    edgeformat(axs,0,0)

    axs = fig.add_subplot(2,2,s+1, polar=True, frameon=False)

    axs.set_rmax(16)
    axs.set_rgrids(np.arange(0,16+4,4), angle=270, labels=[])

    # Hide theta tick labels
    axs.set_thetagrids(np.arange(0,360+45,45), labels=[])

    # Change the grid line style to be more subtle
    axs.grid(True, ls='--', lw=2)

# Set the hspace and wspace to zero
fig.subplots_adjust(hspace=0, wspace=0)
    
# savefig('centric_polar_5helixplot_alt.pdf')

# Visualize transitions

## TM1

In [None]:
fig, axs = plt.subplots()

helix_cxy = dataset.helix_com.query('helix == 1')[['xc', 'yc']].values
helix_hist2d = hist2d(*helix_cxy.T, bins=50, range=[[-5,5],[0,10]])
helix_hist2d.dens2d_preset2(axs, lines=True, lw=0.2, nlevels=15, lmax=0.11)

axs.set_xlim(-5,5)
axs.set_ylim(0,10)

from clustering.substate_clusters import substates

pop_divide = substates(2, *helix_cxy.T)
pop_divide.gaussian_mixture()
pop_divide.indicate_centers(axs, helix_hist2d.xedges, helix_hist2d.yedges, mdist_lim=1.386)

In [None]:
sub_df = dataset.helix_com.query('helix == 1')[['traj_id', 'timestep', 'xc', 'yc']].copy()
sub_df['state'] = pop_divide.hardgm_states()

i = 0
for t, df in sub_df.groupby('traj_id'):
    if np.any(df['state'] == 0) and np.any(df['state'] == 1):
        i += 1
        fig, axs = plt.subplots()
        helix_hist2d.hist2d_contour(axs, lines=True, lw=0.2, nlevels=15, lmax=0.11)

        axs.set_aspect('equal', adjustable='box', anchor='C')
        axs.grid(True, ls='--')
        axs.set_xlim(-5,5)
        axs.set_ylim(0,10)

        axs.plot(*df[['xc', 'yc']].values.T)
        axs.set_title(f"{t}")
# print(i)

## TM11

In [None]:
fig, axs = plt.subplots()

helix_cxy = dataset.helix_com.query('helix == 11')[['xc', 'yc']].values
helix_hist2d = hist2d(*helix_cxy.T, bins=50, range=[[-15,-5],[-5,5]])
helix_hist2d.dens2d_preset2(axs, lines=True, lw=0.2, nlevels=15, lmax=0.15)

axs.set_xlim(-15,-5)
axs.set_ylim(-5,5)

from clustering.substate_clusters import substates

pop_divide = substates(2, *helix_cxy.T)
pop_divide.gaussian_mixture()
pop_divide.indicate_centers(axs, helix_hist2d.xedges, helix_hist2d.yedges, mdist_lim=1)

In [None]:
sub_df = dataset.helix_com.query('helix == 11')[['traj_id', 'timestep', 'xc', 'yc']].copy()
sub_df['state'] = pop_divide.hardgm_states(mdist_lim=1)

i = 0
for t, df in sub_df.groupby('traj_id'):
    if np.any(df['state'] == 0) and np.any(df['state'] == 1):
        i += 1
        # fig, axs = plt.subplots()
        # helix_hist2d.hist2d_contour(axs, lines=True, lw=0.2, nlevels=15, lmax=0.15)

        # axs.set_aspect('equal', adjustable='box', anchor='C')
        # axs.grid(True, ls='--')
        # axs.set_xlim(-15,-5)
        # axs.set_ylim(-5,5)

        # axs.plot(*df[['xc', 'yc']].values.T)
        # axs.set_title(f"{t}")
# print(i)

# Symmetry of the pore

In [None]:
# Symmetry-ness of the pentagonal pore
pentagon_df = dataset.helix_com.query('helix in @pentagon_helices')
pentagon_df['dist'] = np.sqrt(pentagon_df['xc']**2 + pentagon_df['yc']**2)
# print(pentagon_df['dist'].describe())

state_labels = states_df['tmpc1v2'].unique()

fig, axs = plt.subplots(figsize=(6,2))
edgeformat(axs)

for s in [3,0,2,1]:
    tf = states_df.query('tmpc1v2 == @s')[['traj_id', 'timestep']]
    distances2center = pd.merge(tf, pentagon_df, on=['traj_id', 'timestep'])['dist']
    hist1d(distances2center, bins=100, range=[0,20]).plot(axs, label=map_assign[s], color=color_assign[s], lw=2)
plt.legend()

plt.xlim(0,16)
plt.ylim(0,0.5)
plt.xlabel(r"$d_{\mathrm{center-helix}}$ [Å]", fontsize=16)
plt.ylabel("Prob. density [A.U.]", fontsize=16)
plt.grid(True, ls='--')

# savefig("symmetry_of_the_pore.pdf")

In [None]:
fig, axs = plt.subplots(2,2, sharex=True, sharey=True, gridspec_kw={'hspace': 0, 'wspace': 0})

background = fig.add_subplot(111)
# Hide everything except the axis labels
background.spines['top'].set_color('none')
background.spines['bottom'].set_color('none')
background.spines['left'].set_color('none')
background.spines['right'].set_color('none')
background.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
# Also set the background to completely transparent
background.patch.set_alpha(0.0)

background.set_ylabel("Prob. Density [A.U.]", fontsize=16)
background.set_xlabel("Distance to centre [Å]", fontsize=16)


pentagon_df = dataset.helix_com.query('helix in @pentagon_helices')
pentagon_df['dist'] = np.sqrt(pentagon_df['xc']**2 + pentagon_df['yc']**2)

for s, ax in zip([0,1,2,3], axs.flatten()):
    edgeformat(ax)
    
    tf = states_df.query('tmpc1v2 == @s')[['traj_id', 'timestep']]
    pentagon_states_subdf = pd.merge(tf, pentagon_df, on=['traj_id', 'timestep'])

    for h in pentagon_helices:
        distances2center = pentagon_states_subdf.query('helix == @h')['dist']
        hist1d(distances2center, bins=100, range=[0,20]).plot(ax, label=f'TM{h}')
    
    ax.text(0.05, 0.8, map_assign[s], transform=ax.transAxes, fontsize=16)
    ax.set_xlim(0,20+2)
    ax.set_ylim(0,1.1)
ax.legend()

# savefig("pore_symmetry_by_helix.pdf")

# Inner leaflet

In [None]:
leveldefs = inner_leaflet_defs
# Revised to reflect repeating resID in leveldefs
resids_zlevel = np.array(list(leveldefs.values())).flatten()


# Load the pdb structure of 6MSM
pdbstructure_6msm = md.load(aligned_pdbfile('6msm'))
# Select the CA atoms of the resIDs in the z-level we want to plot
ca_zlevel_6msm = np.hstack([pdbstructure_6msm.top.select(f'name CA and resSeq {r}') for r in resids_zlevel])
# Retrieve the xy coordinates of the CA atoms of interest
xy_6msm = pdbstructure_6msm.xyz[0, ca_zlevel_6msm, :2]*10

# Load the pdb structure of 5UAK
pdbstruc_5uak = md.load(aligned_pdbfile('5uak'))
# Select the CA atoms of the resIDs in the z-level we want to plot
ca_zlevel_5uak = np.hstack([pdbstruc_5uak.top.select(f'name CA and resSeq {r}') for r in resids_zlevel])
# Retrieve the xy coordinates of the CA atoms of interest
xy_5uak = pdbstruc_5uak.xyz[0, ca_zlevel_5uak, :2]*10

In [None]:
# Get protein domain definitions
domain_dict = get_protdef(protein_id=1)
# Names of the domains to align
domains_align = [f"TM{n}" for n in np.arange(12)+1]#['TM1', 'TM2', 'TM3', 'TM6', 'TM10', 'TM11']
# ResIDs to align to the reference with
refalign_resids, _ = advanced_combine(1, 30, 1, *[domain_dict.get(tm) for tm in domains_align])
# Process the resids to a selection string for mdtraj
resid_alignment_selection_string = select_resids_str(refalign_resids, package='mdtraj') + " and backbone"

# xyz coordinates of the resIDs to align to
# Reference structure is 6MSM
align_xyz = pdbstructure_6msm.xyz[0, pdbstructure_6msm.top.select(resid_alignment_selection_string)]*10

In [None]:
# Helix numbers to include/plot
helixnums   =   list(range(1,12+1))
# Range of xy to show in the plot
# xyrange     =   [[-15,55],[0,70]] # For showing all helices
xyrange     =   [[-45,55],[0,100]] # For showing all helices from below
# Number of bins in the histogram; both in x and y
bins        =   100

dataset = helix_positions(traj_ids, helixnums=helixnums, leveldefs=leveldefs)

In [None]:
# Used to make polar plot
# Helices in pentagonal pore
pentagon_helices = [1,6,8,11,12]
# May be different from the traj_ids analyzed
traj_ids_for_penta = traj_ids

# Load the dataset used as a penta-pore reference and get the center
helix_com_for_penta = helix_positions(traj_ids_for_penta, helixnums=pentagon_helices, leveldefs=leveldefs).helix_com[['x', 'y']].values
penta_center = np.mean(helix_com_for_penta, axis=0)

# Center of 5 helices in xy plane
centerx, centery = penta_center

# Shift the xy coordinates of the helices to the center
dataset.helix_com['xc'] = dataset.helix_com['x'] - centerx
dataset.helix_com['yc'] = dataset.helix_com['y'] - centery

# Centered range
crange = [[x-centerx for x in xyrange[0]], [y-centery for y in xyrange[1]]]

# Shift the xy coordinates of the helices from pdb structures to the center
xy_6msm = xy_6msm - np.array([centerx, centery])
xy_5uak = xy_5uak - np.array([centerx, centery])

In [None]:
# Contour plot showing density of positions
fig, axs = plt.subplots()

for h in helixnums:
    helix_cxy = dataset.helix_com.query('helix == @h')[['xc', 'yc']].values
    helix_hist2d = hist2d(*helix_cxy.T, bins=bins, range=crange)
    helix_hist2d.dens2d_preset2(axs, lines=True, lw=0.2, nlevels=15, lmax=0.46)
    # print(helix_hist2d.densmax)

axs.scatter(*xy_6msm.T, c='cyan', marker='x', s=32)
# axs.scatter(*xy_5uak.T, c='blue', marker='x', s=32)

axs.set_aspect('equal', adjustable='box', anchor='C')
axs.grid(True, ls='--')
axs.set_xlim(-30,30)
axs.set_ylim(-30,30)
axs.set_xlabel('x [Å]', fontsize=14)
axs.set_ylabel('y [Å]', fontsize=14)

# savefig('allhelix_xy_inner.pdf')

In [None]:
# Use dimensions of axes from the previous plot for the polar plot
bbox = axs.get_position()
x0, y0, width, height = bbox.x0, bbox.y0, bbox.width, bbox.height
rect = [x0, y0, width, height]

# states_df = states_df.query("traj_id in @traj_ids")
# states_df.reset_index(inplace=True)

state_def   = 'tm1'

fig, axs = plt.subplots(figsize=(6,6))

state_select = (states_df[state_def].values == 1)
for h in [1,2,6,8,10,11,12]:
    hcoords = dataset.helix_com.query('helix == @h').loc[state_select][['xc', 'yc']].values
    hist = hist2d(*hcoords.T, range=crange, bins=bins)
    plot = hist.hist2d_contour(axs, levels=np.linspace(0, 0.4, 15)[1:], lw=0.3, colors='red')

state_select = (states_df[state_def].values != 1)
for h in [1,2,6,8,10,11,12]:
    hcoords = dataset.helix_com.query('helix == @h').loc[state_select][['xc', 'yc']].values
    hist = hist2d(*hcoords.T, range=crange, bins=bins)
    plot = hist.hist2d_contour(axs, levels=np.linspace(0, 0.4, 15)[1:], lw=0.3, colors='blue')

# Alt setting
axs.set_aspect('equal', adjustable='box', anchor='C')
axs.set_xticks(np.arange(-20,20+1,4))
axs.set_xticklabels([None]*len(np.arange(-20,20+1,4))) ##
axs.set_yticks(np.arange(-20,20+1,4))
axs.set_yticklabels([None]*len(np.arange(-20,20+1,4))) ##
axs.grid(False)
axs.set_xlim(-20,20)
axs.set_ylim(-20,20)
edgeformat(axs,0,0)

axs.fill_between([-20,0,20], [0,0,0], [20,20,20], alpha=0.2, color='blue')

axs = fig.add_subplot(1,1,1, polar=True, frameon=False)

axs.set_rmax(extent_xy)
axs.set_rgrids(np.arange(0,20+4,4), angle=270, labels=[])

# Hide theta tick labels
axs.set_thetagrids(np.arange(0,360+45,45), labels=[])

# Change the grid line style to be more subtle
axs.grid(True, ls='--', lw=2)
    
# savefig('centric_polar_6helixplot_compare_bluered.pdf')

In [None]:
# Use dimensions of axes from the previous plot for the polar plot
bbox = axs.get_position()
x0, y0, width, height = bbox.x0, bbox.y0, bbox.width, bbox.height
rect = [x0, y0, width, height]


fig, axs = plt.subplots(figsize=(6,6))

state_def   = 'tmpc1v2'
state_select = (states_df[state_def].values == 1)
for h in [1,2,6,8,10,11,12]:
    hcoords = dataset.helix_com.query('helix == @h').loc[state_select][['xc', 'yc']].values
    hist = hist2d(*hcoords.T, range=crange, bins=bins)
    plot = hist.hist2d_contour(axs, levels=np.linspace(0, 0.58, 15)[1:], lw=0.1, colors='red')
    print(hist.densmax)

for h in [1,2,6,8,10,11,12]:
    hcoords = dataset2.helix_com.query('helix == @h')[['xc', 'yc']].values
    hist = hist2d(*hcoords.T, range=crange, bins=bins)
    plot = hist.hist2d_contour(axs, levels=np.linspace(0, 0.58, 15)[1:], lw=0.1, colors='black')
    print(hist.densmax)
    
# Alt setting
axs.set_aspect('equal', adjustable='box', anchor='C')
axs.set_xticks(np.arange(-extent_xy,extent_xy+1,4))
axs.set_xticklabels([None]*len(np.arange(-extent_xy,extent_xy+1,4))) ##
axs.set_yticks(np.arange(-extent_xy,extent_xy+1,4))
axs.set_yticklabels([None]*len(np.arange(-extent_xy,extent_xy+1,4))) ##
axs.grid(False)
axs.set_xlim(-extent_xy,extent_xy)
axs.set_ylim(-extent_xy,extent_xy)
edgeformat(axs,0,0)

axs.fill_between([-20,0,20], [-6,0,0], [20,20,20], alpha=0.2, color='blue')

axs = fig.add_subplot(1,1,1, polar=True, frameon=False)

axs.set_rmax(extent_xy)
axs.set_rgrids(np.arange(0,extent_xy+4,4), angle=270, labels=[])

# Hide theta tick labels
axs.set_thetagrids(np.arange(0,360+45,45), labels=[])

# Change the grid line style to be more subtle
axs.grid(True, ls='--', lw=2)
    
# savefig('centric_polar_6helixplot_compare_closed_open_blackred.pdf')