# Fit4Function libraries enable accurate functional screening and prediction for in vivo assays. 


In [None]:
# Setup 
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import re

from IPython.display import Image

from scipy.stats import gaussian_kde

-----------
# Fit4Function variant biodistribution correlation between organs across 4 animals
 

In [None]:
# Data
Organs_data = pd.ExcelFile('data/fit4function_library_invivo.xlsx')
organs = ['Serum', 'Liver', 'Spleen', 'Kidney', 'Heart','Lung', 'SpinalCord', 'Brain']


In [None]:
# Correlation for each organ accross animals 

# Initializations 
organ_avg = dict()
organ_cors = dict()
organ_cors_avg = dict()

for organ in organs: 
    crntOrgan = pd.read_excel(Organs_data, organ)  
    
    # Slice
    enr_cols = [col for col in crntOrgan.columns if  re.search(r'Animal[1-4]', col)]

    # Correlation 
    crnt_corr = []
    for i, j in zip(*np.tril_indices(len(enr_cols), k=-1)):
        x = np.array(crntOrgan[enr_cols[i]])
        y = np.array(crntOrgan[enr_cols[j]])
        remove = np.isnan(x) | np.isnan(y) | np.isinf(x) | np.isinf(y)
        x = x[~remove]
        y = y[~remove]
        crnt_corr_pair = np.corrcoef(x, y)[0, 1]
        crnt_corr.append(crnt_corr_pair)
    organ_cors[organ] = crnt_corr
    


In [None]:
# Summary Stats

# Average correlation for each organ 
organ_cors_avg = {k: np.mean(v) for k, v in organ_cors.items()}
print(organ_cors_avg)

# Std correlation for each organ 
organ_cors_std = {k: np.std(v) for k, v in organ_cors.items()}
organ_cors_std

In [None]:
# Figure

# Figure Configurations 
sns.set_theme(style='ticks', font_scale=0.75, rc={
    'svg.fonttype': 'none',
    'font.sans-serif': ['Arial'],
    'font.family': 'sans-serif',
    'text.usetex': False,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'font.size': 5,
    'axes.labelsize': 5,
    'axes.titlesize': 5,
    'axes.labelpad': 2,
    'axes.linewidth': 0.5,
    'axes.titlepad': 4,
    'lines.linewidth': 0.5,
    'legend.fontsize': 5,
    'legend.title_fontsize': 5,
    'xtick.major.size': 3,
    'xtick.major.pad': 3,
    'xtick.major.width': 0.5,
    'ytick.major.size': 3,
    'ytick.major.pad': 3,
    'ytick.major.width': 0.5,
})

fig = plt.figure(figsize=(1.75, 1.75), dpi=300)
gs = fig.add_gridspec(
    1, 1, bottom=0.4, top=0.9, left=0.3, right=0.9
)

ax = fig.add_subplot(gs[0, 0])

for i in range(0, len(organs) - 1):
    ax.axvline(i + 1.5, color='#DDD', linewidth=0.3)
    
x = np.repeat(np.arange(0, len(organs)) + 1, 6)
y = np.concatenate(list(organ_cors.values()))

# Bars 
ax.bar(np.arange(0, len(organs))+1, list(organ_cors_avg.values()),
      edgecolor='none', color='r', alpha=0.4)

ax.errorbar(np.arange(0, len(organs))+1, list(organ_cors_avg.values()), list(organ_cors_std.values()),
           fmt='none', ecolor='k', elinewidth=0.5, capsize=2, capthick=0.5)

ax.scatter(
    x, y, s=10, marker='o',
    linewidths=0.5,
    facecolors='none', 
    edgecolors='r')

# Labeling 
ax.set_xticks(np.arange(0, len(organs)) + 1)
ax.set_xticklabels(organs)
for tick in ax.get_xticklabels():
    tick.set_rotation(90)
ax.tick_params(axis='x', length=2, pad=1)
    
ax.set_yticks(np.arange(0.2, 1., 0.2))
ax.set_ylim([0.15, 0.95])
ax.tick_params(axis='y', length=2, pad=1)

ax.set_ylabel('Pairwise replicate\ncorrelation')

# Show and save 
filename = 'figures/fig3e_fit4function_replication_quality_invivo'
plt.savefig('{}.png'.format(filename), transparent=True)
plt.savefig('{}_600dpi.svg'.format(filename), transparent=True, dpi = 600)
plt.savefig('{}_1200dpi.svg'.format(filename), transparent=True, dpi = 1200)

plt.close()

Image(filename + '.png')

------

# Fit4Function modeling performance for C57 in vivo biodistribution 


In [None]:
# Data
Predictions = pd.ExcelFile('data/fit4function_library_invivo_predictions.xlsx')
organs = ['Serum', 'Liver', 'Spleen', 'Kidney', 'Heart','Lung', 'SpinalCord', 'Brain']

test_dfs = dict()
for organ in organs:
        test_dfs[organ] = pd.read_excel(Predictions, organ)
        

In [None]:
# Figure 

# Configurations
fig = plt.figure(figsize=(3.5, 3), dpi=600)
gs = fig.add_gridspec(
    2, 4, 
    wspace=0.1, hspace=0.01
)

xlim = [-7, 8]
ylim = xlim

xticks = [-5, 0, 5]
yticks = xticks
pt_size = 0.5
cmap = mpl.cm.viridis


for i, organ in enumerate(organs):
    ax = fig.add_subplot(gs[i // 4, i % 4])
    
    # Data 
    df = test_dfs[organ]   
    x = df['Measured']
    y = df['Predicted']
    
    # Kernel
    kernel = gaussian_kde(np.vstack([
        x.sample(n=1000, random_state=1),
        y.sample(n=1000, random_state=1)
    ]))
    c = kernel(np.vstack([x, y]))
    
    # Specs 
    ax.set_aspect('equal', 'box')
    ax.scatter(
        x, y, c=c, s=pt_size, cmap=cmap, 
        rasterized=True, linewidth=0, edgecolors=None
    )
    ax.plot(xlim, ylim, '-r', linewidth=0.3)
    ax.set_xlim(xlim); ax.set_ylim(ylim)
    
    # Label 
    #if i // 4 == 2:# or i == 5:
    ax.set_xlabel('Measured')
    #else:
    #    ax.set_xticklabels([])
        
    if i % 4 == 0:
        ax.set_ylabel('Predicted')
    else:
        ax.set_yticklabels([])
        
    ax.text(
        -6, 7, s='{}\n$r$={:.2f}'.format(organ, np.corrcoef(x, y)[0, 1]), 
        ha='left', va='top', fontsize=4
    )
    
    
# Show plot and save
filename = 'figures/fig3f_fit4function_prediction_quality_invivo'
plt.savefig('{}.png'.format(filename), transparent=True)
plt.savefig('{}_600dpi.svg'.format(filename), transparent=True, dpi=600)
plt.savefig('{}_1200dpi.svg'.format(filename), transparent=True, dpi = 1200)

plt.show()