# MultiFunction individual variant validation


In [None]:
# Setup 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image

import math

from utils_f4f import spiderplot

------
# Selection of MultiFunction Individual Variants 




In [None]:
# Data
df = pd.read_csv('data/multifunction_individual_variants_in_pool.csv') 
df

In [None]:
# Meta data
assay_cols = [
    'Production',

    'HepG2_bind_log2enr',
    'THLE_bind_log2enr',

    'HepG2_trans_log2enr',
    'THLE_trans_log2enr',

    'biod_Liver_log2enr',
    'biod_Kidney_log2enr',    
    'biod_Brain_log2enr',
    'biod_SC_log2enr',

    'invivo_SC_log2enr',
    'invivo_Brain_log2enr',
    'invivo_Kidney_log2enr',
    'invivo_Liver_log2enr',
]

assay_names = [
    'Production Fitness',   

    'HepG2 binding',
    'THLE binding',

    'HepG2 transduction',
    'THLE transduction',

    'Liver Biodistribution',
    'Kidney Biodistribution',   
    'Brain Biodistribution',
    'Spinal Cord Biodistribution',

    'Spinal Cord Transduction',
    'Brain Transduction',
    'Kidney Transduction',
    'Liver Transduction',
]

assay_bounds = [
     (-5.8, 2.0), 
     (-8.2, 2.6),
     (-8.4, 2.8),
     (-7.1, 4.3),
     (-6.4, 4.2),
     (-3.6, 1.2),
     (-2.2, 1.1),
     (-6.5, 2.3),
     (-5.3, 2.7),
     (-10.5, 3.9),
     (-10.3, 4.0),
     (-9.9, 3.4),
     (-8.3, 2.2)]

In [None]:
# Aggregation (averaging) accross codon replicates
dfa = df[['Label', 'SeqID'] + assay_cols]
dfa = dfa.fillna('').groupby(['Label','SeqID'], as_index=False).agg(np.mean)
dfa.head()

In [None]:
# Figure Configurations
sns.set_theme(style='ticks', font_scale=0.75, rc={
    'font.family': 'sans-serif',
    # 'font.sans-serif': ['Arial', 'DejaVu Sans'],
    'svg.fonttype': 'none',
    'text.usetex': False,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'font.size': 9,
    'axes.labelsize': 8,
    'axes.titlesize': 8,
    'axes.labelpad': 2,
    'axes.linewidth': 0.5,
    'axes.titlepad': 4,
    'lines.linewidth': 0.5,
    'legend.fontsize': 9,
    'legend.title_fontsize': 9,
    
    'xtick.major.size': 2,
    'xtick.major.pad': 1,
    'xtick.major.width': 0.,
    
    'xtick.minor.size': 2,
    'xtick.minor.pad': 1,
    'xtick.minor.width': 0.,
    
    'ytick.major.size': 2,
    'ytick.major.pad': 1,
    'ytick.major.width': 0.5,

    'ytick.minor.size': 2,
    'ytick.minor.pad': 1,
    'ytick.minor.width': 0.5,
})


In [None]:
# Spiderplot Initialization 

assay_shortnames = [
    'P',
    'H','T',
    'H','T',
    'L','K','B','S',
    'S','B','K','L',
]
assay_thetas = [
    0.075, # 'Production Fitness',
    0.22,0.28,# in vitro binding
    0.38,0.44,# in vitro trans
    0.55,0.6,0.65,0.7, # in vivo biod
    0.8,0.85,0.9,0.95, # in vivo trans
]
assays = (assay_cols, assay_shortnames, assay_bounds, assay_thetas)

assay_label_colors = [
    '#444',
    '#444','#444',
    '#444','#444',
    '#444','#999','#999','#999',
    '#999','#999','#999','#444',
]



# Run Spiderplot Example 
fig, ax = plt.subplots(figsize=(1.3, 1.), subplot_kw={'projection': 'polar'}, dpi=200)
fig.subplots_adjust(left=0.2, right=0.8)
ax = spiderplot(ax, df, dfa, 'BI-151', assays, assay_label_colors)
plt.show()

In [None]:
# Spider plots 
bi_variants = [
    'AAV9',
    'BI-151',
    'BI-152',
    'BI-153',
    'BI-154',
    'BI-155',
    'BI-156',
    'BI-157',
]


n_cols = 4
n_rows = math.ceil(len(bi_variants)/n_cols)

fig = plt.figure(figsize=(4.2, 2.3), dpi=200)
gs = fig.add_gridspec(n_rows, n_cols, wspace=0.2, hspace=0.5, left=0.03, right=0.97, top=0.87, bottom=0.05)
for i, aa in enumerate(bi_variants):
    ax = fig.add_subplot(gs[i // n_cols, i % n_cols], projection='polar')
    spiderplot(ax, df, dfa, aa, assays, assay_label_colors)
    

# Plot and save 
filename = 'figures/fig4d_multifunction_variants_spiderplots'
fig.savefig(filename + '.png' , transparent=True, dpi=200)
plt.close()

Image(filename + '.png')

--------------

# Transduction of HEPG2 and THLE by the seven MultiFunction variants 

HepG2 and THLE transduction were assessed 24 hours post-transduction at 3000 vg/cell using a luciferase assay (n = 4 transduction replicates per group. Luciferase relative light units were normalized to AAV9.

In [None]:
# Data 
trans_3e3 = pd.read_csv('data/multifunction_variants_hepatocyte_moi3e3_transduction.csv')
#trans_3e3.rename(columns={'Unnamed: 0': 'capsid'}, inplace=True)
trans_3e3


In [None]:
# Processing for visualization 1

trans_3e3.iloc[:, 1:] = trans_3e3.iloc[:, 1:].divide(np.repeat(np.array([
    np.mean(trans_3e3.iloc[0, 1:5]), # THLE mean
    np.mean(trans_3e3.iloc[0, 5:9]) # HEPG2 mean
]), 4), axis=1)
trans_3e3


In [None]:
# Processing for visualization 2
'''
trans_3e3_f = pd.melt(trans_3e3, id_vars='capsid', var_name='sample')
trans_3e3_f['rep'] = trans_3e3_f['sample'].str.split('.').apply(lambda x: int(x[1])+1 if len(x) > 1 else 1)
trans_3e3_f['cell'] = trans_3e3_f['sample'].str.split('.').apply(lambda x: x[0])
trans_3e3_f['x'] = pd.Categorical(trans_3e3_f['capsid']).codes
trans_3e3_f['x_offset'] = pd.Categorical(trans_3e3_f['cell'], categories=['THLE', 'HePG2']).codes
trans_3e3_f
'''
trans_3e3_f = pd.melt(trans_3e3, id_vars='capsid', var_name='sample')
trans_3e3_f['rep'] = trans_3e3_f['sample'].str.split('.').apply(lambda x: int(x[1])+1 if len(x) > 1 else 1)
trans_3e3_f['cell'] = trans_3e3_f['sample'].str.split('.').apply(lambda x: x[0])
trans_3e3_f['x'] = pd.Categorical(trans_3e3_f['capsid']).codes
trans_3e3_f['x_offset'] = pd.Categorical(trans_3e3_f['cell'], categories=['THLE', 'HePG2']).codes
trans_3e3_f
print(trans_3e3_f.to_string())

In [None]:
# Processing for visualization 3
trans_3e3_avg = (
    trans_3e3_f
    .groupby(['capsid', 'x', 'x_offset', 'cell'], as_index=False)
    [['value']]
    .agg([np.mean, np.std])
    .reset_index()
)
trans_3e3_avg


In [None]:
# Figure Configurations
_new_black = '#000'
sns.set_theme(style='ticks', font_scale=0.75, rc={
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'DejaVu Sans'],
    'svg.fonttype': 'none',
    'text.usetex': False,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'font.size': 9,
    'axes.labelsize': 9,
    'axes.titlesize': 9,
    'axes.labelpad': 2,
    'axes.linewidth': 0.5,
    'axes.titlepad': 4,
    'lines.linewidth': 0.5,
    'legend.fontsize': 9,
    'legend.title_fontsize': 9,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'xtick.major.size': 2,
    'xtick.major.pad': 1,
    'xtick.major.width': 0.5,
    'ytick.major.size': 2,
    'ytick.major.pad': 1,
    'ytick.major.width': 0.5,
    'xtick.minor.size': 2,
    'xtick.minor.pad': 1,
    'xtick.minor.width': 0.5,
    'ytick.minor.size': 2,
    'ytick.minor.pad': 1,
    'ytick.minor.width': 0.5,

    # Avoid black unless necessary
    'text.color': _new_black,
    'patch.edgecolor': _new_black,
    'patch.force_edgecolor': False, # Seaborn turns on edgecolors for histograms by default and I don't like it
    'hatch.color': _new_black,
    'axes.edgecolor': _new_black,
    'axes.labelcolor': _new_black,
    'xtick.color': _new_black,
    'ytick.color': _new_black

})


fig = plt.figure(figsize=(2.75, 1.75), dpi=200)
gs = fig.add_gridspec(1, 1, bottom=0.25, top=0.85, left=0.25, right=0.95)

bar_width = 0.35
bar_padding = 0.02
bar_offset = (bar_width / 2) + bar_padding
bar_alpha=0.5

# Plot 
ax = fig.add_subplot(gs[0, 0])
ax.bar(trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'THLE', 'x']-bar_offset, 
       trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'THLE']['value']['mean'],
      label='THLE', width=bar_width, color='r', alpha=bar_alpha, rasterized=True)

ax.bar(trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'HEPG2', 'x']+bar_offset, 
       trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'HEPG2']['value']['mean'],
      label='HEPG2', width=bar_width, color='b', alpha=bar_alpha, rasterized=True)


ax.scatter(
    trans_3e3_f.loc[trans_3e3_f['cell'] == 'THLE']['x']-bar_offset + 
    (trans_3e3_f.loc[trans_3e3_f['cell'] == 'THLE', 'x_offset']*(bar_width + (bar_padding*2))), 
    trans_3e3_f.loc[trans_3e3_f['cell'] == 'THLE']['value'], 
    s=20, alpha=1.0, facecolor='none', linewidth=0.5,
    edgecolor='r'
)

ax.scatter(
    trans_3e3_f.loc[trans_3e3_f['cell'] == 'HEPG2']['x']+3*bar_offset + 
    (trans_3e3_f.loc[trans_3e3_f['cell'] == 'HEPG2', 'x_offset']*(bar_width + (bar_padding*2))), 
    trans_3e3_f.loc[trans_3e3_f['cell'] == 'HEPG2']['value'], 
    s=20, alpha=1.0, facecolor='none', linewidth=0.5,
    edgecolor='b'
)


ax.errorbar(
    trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'THLE']['x']-bar_offset +
    (trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'THLE']['x_offset']*(bar_width + (bar_padding*2))),
    trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'THLE']['value']['mean'],
    yerr=trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'THLE']['value']['std'], fmt='none', ecolor='k', elinewidth=0.5, 
    capsize=2, capthick=0.5
)

ax.errorbar(
    trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'HEPG2']['x']+3*bar_offset +
    (trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'HEPG2']['x_offset']*(bar_width + (bar_padding*2))),
    trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'HEPG2']['value']['mean'],
    yerr=trans_3e3_avg.loc[trans_3e3_avg['cell'] == 'HEPG2']['value']['std'], fmt='none', ecolor='k', elinewidth=0.5, 
    capsize=2, capthick=0.5
)


# Labeling 
ax.set_xticks(np.arange(0, 8))
ax.set_xticklabels(['AAV9', 'BI-151', 'BI-152', 'BI-153', 'BI-154', 'BI-155', 'BI-156', 'BI-157'])
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
    tick.set_ha('right')
    
ax.set_yscale('log', base=10)
ax.set_yticks([1, 10, 100, 1000])
ax.set_yticklabels([1, 10, 100, 1000])
ax.set_ylim([0.5, 2500])
ax.tick_params(axis='y', length=5)
    
ax.legend(loc='lower left', bbox_to_anchor=(0, 1), ncol=2, frameon=False,
         handlelength=1, handletextpad=0.4, borderaxespad=0, columnspacing=1)

ax.set_ylabel('Normalized\nLuciferase Activity')


# Plot and save 
filename = 'figures/fig4e_multifunction_variants_invitro'
fig.savefig(filename + '.png', dpi=200)
plt.close()

Image(filename + '.png')