Plotting CDFs

In [None]:
for combo in pairs:
    X2 = np.sort(frag_pair_distance_dict[combo])
    W = frag_weight_dict[combo][np.argsort(frag_pair_distance_dict[combo])]
    N = len(frag_pair_distance_dict[combo])
    F2 = np.array(range(N))/float(N)

    plt.plot(X2, F2)
    plt.plot(X2, np.cumsum(W)/np.sum(W))
    plt.hist(frag_pair_distance_dict[combo], bins=30,
             alpha=0.2, density=True, cumulative=True)
    plt.show()


In [None]:
fig, axs = plt.subplots(nrows=6, ncols=1, sharex=True,
                        figsize=(5, 18), dpi=200)
fig.suptitle('DPP11', y=0.9)

for i, combo in tqdm(enumerate(pairs), total=6):
    X2 = np.sort(frag_pair_distance_dict[combo])
    W = frag_weight_dict[combo][np.argsort(frag_pair_distance_dict[combo])]
    N = len(frag_pair_distance_dict[combo])

    axs[i].set_title('{} histogram, N={}'.format(
        combo, len(frag_pair_distance_dict[combo])))
    axs[i].plot(X2, np.cumsum(W)/np.sum(W), alpha=0.5, color='orange')

    for n in range(n_rand):
        X2 = np.sort(rand_pair_dicts[n][combo])
        W = rand_weight_dicts[n][combo][np.argsort(rand_pair_dicts[n][combo])]
        N = len(rand_pair_dicts[n][combo])
        axs[i].plot(X2, np.cumsum(W)/np.sum(W), alpha=0.1, color='grey')

legend_elements = [Rectangle((0, 0), 1, 1, color='orange', label='Measured'),
                   Rectangle((0, 0), 1, 1,  color='grey', label='Random')]
axs[0].legend(handles=legend_elements, loc='upper left')

fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', which='both', top=False,
                bottom=False, left=False, right=False)
plt.xlabel('Distance (angstrom)')
plt.ylabel('Probability Density', labelpad=20)
fig.show()


Visualise Histograms and KDEs against random

In [None]:
n_bins = 30
n_rand = 10
nx = 500

# mpl.rcParams['figure.dpi'] = 200

fig, axs = plt.subplots(nrows=6, ncols=2, sharex=True, figsize=(16,24), dpi=200)
# fig.suptitle('DPP11', y=0.9)
fig.suptitle('Mac-1', y=0.9)
# fig.suptitle('MPro', y=0.9)

for i, combo in tqdm(enumerate(pairs), total=6):
    axs[i,0].set_title('{} histogram, N={}'.format(combo, len(frag_pair_distance_dict[combo])))
    axs[i,0].hist(frag_pair_distance_dict[combo], bins=n_bins, alpha = 0.5, density=True, color='orange')
    
    # axs[i,1].set_title('{} KDE, K-S pval median ={:.2e}'.format(combo, pval_median[i]))
    axs[i,1].set_title('{} KDE'.format(combo))
        
    kde_pair = kde_dict_opt[combo]
    
    x = np.linspace(0, np.amax(frag_pair_distance_dict[combo]), nx)
    
    pair_dist = np.exp(kde_pair.score_samples(x.reshape(-1,1)))
    pair_dist = pair_dist.flatten()
    pair_dist = pair_dist#/np.sum(pair_dist)
    
    axs[i,1].plot(x, pair_dist, color='orange')
    
    for n in range(n_rand):
        axs[i,0].hist(rand_pair_dicts[n][combo], bins=30, alpha=0.1, density=True, color='grey')
        
        kde_rand = rand_kde_dicts[n][combo]
    
        rand_dist = np.exp(kde_rand.score_samples(x.reshape(-1,1)))
        rand_dist = rand_dist.flatten()
        rand_dist = rand_dist#/np.sum(rand_dist)
        
        axs[i,1].plot(x, rand_dist, alpha=0.2, color='grey')
        
legend_elements = [Rectangle((0,0), 1,1 , color='orange', label='Measured'),
               Rectangle((0,0), 1,1,  color='grey', label='Random')]
axs[0,0].legend(handles=legend_elements, loc='upper right')

fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)
plt.xlabel('Distance (angstrom)')
plt.ylabel('Probability Density', labelpad=20)
fig.show()

Visualise CDFs against random

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

n_bins = 30
n_rand = 10
nx = 500

# mpl.rcParams['figure.dpi'] = 200

fig, axs = plt.subplots(nrows=6, ncols=2, sharex=True,
                        figsize=(16, 24), dpi=200)
fig.suptitle('DPP11', y=0.9)

for i, combo in tqdm(enumerate(pairs), total=6):
    axs[i, 0].set_title('{} raw CDF, N={}'.format(
        combo, len(frag_pair_distance_dict[combo])))
    axs[i, 0].hist(frag_pair_distance_dict[combo], bins=n_bins,
                   alpha=0.5, density=True, color='orange', cumulative=True)

    axs[i, 1].set_title('{} KDE CDF'.format(combo))

    kde_pair = kde_dict_opt[combo]

    x = np.linspace(0, np.amax(frag_pair_distance_dict[combo]), nx)

    pair_dist = np.exp(kde_pair.score_samples(x.reshape(-1, 1)))
    pair_dist = pair_dist.flatten()
    pair_dist = pair_dist  # /np.sum(pair_dist)

    axs[i, 1].plot(x, np.cumsum(pair_dist/np.sum(pair_dist)), color='orange')

    for n in range(n_rand):
        axs[i, 0].hist(rand_pair_dicts[n][combo], bins=30,
                       alpha=0.02, density=True, color='grey', cumulative=True)

        kde_rand = rand_kde_dicts[n][combo]

        rand_dist = np.exp(kde_rand.score_samples(x.reshape(-1, 1)))
        rand_dist = rand_dist.flatten()
        rand_dist = rand_dist  # /np.sum(rand_dist)

        axs[i, 1].plot(x, np.cumsum(rand_dist/np.sum(rand_dist)),
                       alpha=0.1, color='grey')

legend_elements = [Rectangle((0, 0), 1, 1, color='orange', label='Measured'),
                   Rectangle((0, 0), 1, 1,  color='grey', label='Random')]
axs[0, 0].legend(handles=legend_elements, loc='upper right')

fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', which='both', top=False,
                bottom=False, left=False, right=False)
plt.xlabel('Distance (angstrom)')
plt.ylabel('Probability Density', labelpad=20)
fig.show()
