# Fit4Function library validation: Fit4Function libraries uniformly sample the high production fitness space.


# Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import Image

from utils_f4f import heatmap

-------
# Fit4Function production fitness distribution aligns with the expected production fit distribution 



In [None]:
# Data 

# Modeling library 
FileName = 'data/modeling_library_production_fitness.csv';
training = pd.read_csv(FileName, usecols=['AA', 'Label','Production'])

# Fit4Function library 
Fit4Fxn = pd.read_csv('data/fit4function_library_screens.csv', usecols=['AA','Production1'])

In [None]:
# Parameters 

# Calibration shift 
"""
A shift in Fit4Function production measurements to be brought at bar with measurements in Modeling library.
Estimated by a regression model. 
"""
Claibration_Shift = 1.2923


In [None]:
# Preprocessing 

training_x = np.log2(training['Production'])
valid_training_x = training_x[~np.isinf(training_x)]
n_invalid_training_x = np.isinf(training_x).sum()

hh_x = Fit4Fxn['Production1']
valid_hh_x = hh_x[~np.isinf(hh_x)]
n_invalid_hh_x = np.isinf(hh_x).sum()


In [None]:
# Figure 

bin_width = 0.1
bins = np.arange(-200, 200, bin_width)
ylim = [0, 0.3]

sns.set_theme(style='ticks', font_scale=0.75, rc={
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'DejaVu Sans'],
    'svg.fonttype': 'none',
    'text.usetex': False,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'font.size': 9,
    'axes.labelsize': 9,
    'axes.titlesize': 9,
    'axes.labelpad': 2,
    'axes.linewidth': 0.5,
    'axes.titlepad': 4,
    'lines.linewidth': 0.5,
    'legend.fontsize': 9,
    'legend.title_fontsize': 9,
    'xtick.major.size': 2,
    'xtick.major.pad': 2,
    'xtick.major.width': 0.5,
    'ytick.major.size': 2,
    'ytick.major.pad': 2,
    'ytick.major.width': 0.5,
    'xtick.minor.size': 2,
    'xtick.minor.pad': 2,
    'xtick.minor.width': 0.5,
    'ytick.minor.size': 2,
    'ytick.minor.pad': 2,
    'ytick.minor.width': 0.5,
})

fig = plt.figure(figsize=(1.75, 1.5), dpi=300)
#fig = plt.figure(figsize=(6, 4))
gs = fig.add_gridspec(
    # 1, 2,
    1, 1,
    wspace=0, hspace=0,
    # width_ratios=[1, 8],
    left=0.25, right=0.95, bottom=0.3, top=0.95
)

# Histograms
ax = fig.add_subplot(gs[0, 0])
ax.hist(
    np.concatenate([
        valid_training_x, 
        np.zeros(n_invalid_training_x) - 100
    ]), 
    bins=bins, linewidth=0, density=True, color='#AAA',
    rasterized=True
)

ax.hist(
    np.concatenate([
        valid_hh_x, 
        np.zeros(n_invalid_hh_x) - 100
    ]) + Claibration_Shift, 
    bins=bins, linewidth=0, density=True, color='#FF1D25', alpha=0.5,
    rasterized=True
)

# Text 
ax.text(-12.5, 0.09, 'Uniform\nLibrary', ha='left', va='bottom', color='#888', fontsize=9)
ax.text(0.95, 0.25, 'Fit4Function Library', 
        transform=mpl.transforms.blended_transform_factory(ax.transAxes, ax.transData), 
        ha='right', va='bottom', color='#FF1D25', fontsize=9)

ax.set_xlim([-14, 7])
ax.set_xticks([-12, -8, -4, 0, 4])
ax.set_ylim(ylim)
# ax.set_yticks([])

ax.set_xlabel('Measured\nProduction Fitness')
ax.set_ylabel('Density')

# Save figure and show 
filename = 'figures/fig3a_fit4function_vs_uniform_distribution'
fig.savefig('{}.png'.format(filename), transparent=True, dpi=300)
fig.savefig('{}_600dpi.svg'.format(filename), dpi=600)
fig.savefig('{}_1200dpi.svg'.format(filename), dpi=1200)

plt.close()

Image(filename + '.png')

-----------
# Fit4Function AA distribution aligns with that expected of the high-fit distribution 



In [None]:
# Data 

# Fit4Function library 
Fit4Fxn = pd.read_csv('data/fit4function_library_screens.csv', usecols=['AA'])

# Modeling 
modeling = pd.read_csv('data/modeling_library_production_fitness.csv', usecols=['AA', 'Label','Production'])
# Slicing out high production fit variants 
high_fit = modeling[(np.log2(modeling.Production) > -2) & ((modeling.Label =='Designed') | (modeling.Label =='Calibration'))] 
high_fit = high_fit.reset_index(drop = True)

# NNK 240K 
nnk240 = pd.read_csv('data/nnk_library_top_production_fitness_240k.csv')


In [None]:
# Figure

# Figure Configuration 
sns.set_theme(style='ticks', font_scale=0.75, rc={
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'DejaVu Sans'],
    'svg.fonttype': 'none',
    'text.usetex': False,
    'pdf.fonttype': 42,
    'ps.fonttype': 42,
    'font.size': 9,
    'axes.labelsize': 9,
    'axes.titlesize': 9,
    'axes.labelpad': 2,
    'axes.linewidth': 0.5,
    'axes.titlepad': 4,
    'lines.linewidth': 0.5,
    'legend.fontsize': 9,
    'legend.title_fontsize': 9,
    'xtick.major.size': 2,
    'xtick.major.pad': 2,
    'xtick.major.width': 0.5,
    'ytick.major.size': 2,
    'ytick.major.pad': 2,
    'ytick.major.width': 0.5,
    'xtick.minor.size': 2,
    'xtick.minor.pad': 2,
    'xtick.minor.width': 0.5,
    'ytick.minor.size': 2,
    'ytick.minor.pad': 2,
    'ytick.minor.width': 0.5,
})

fig = plt.figure(figsize=(2.5, 2.5), dpi=300)
gs = fig.add_gridspec(
    1, 3, left=0.04, right=0.77, bottom=0.15, top=0.78,
    hspace=0., wspace=0.2,
)
gs2 = fig.add_gridspec(
    1, 1, left=0.78, right=0.80, bottom=0.17, top=0.76
)

cmap = mpl.cm.coolwarm
vmin = -0.1
vmax = 0.1


# Heatmaps 
ax = fig.add_subplot(gs[:, 0])
heatmap(Fit4Fxn['AA'], ax, 'Fit4Function\nn = 240K','results/heatmap_fit4function.csv',vmin, cmap, vmax)

ax = fig.add_subplot(gs[:, 1])
heatmap(high_fit['AA'], ax, 'Modeling\nHigh production\nn = 26K','results/heatmap_fitmodeling.csv',vmin, cmap, vmax)
ax.set_xlabel('Position', labelpad=3)

ax = fig.add_subplot(gs[:, 2])
heatmap(nnk240['AA'], ax, 'NNK\nn = 240K','results/heatmap_nnk_top_240k.csv',vmin, cmap, vmax)

# Colorbar
ax = fig.add_subplot(gs2[0, 0])
norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
cb1 = mpl.colorbar.ColorbarBase(
    ax, 
    cmap=cmap,
    norm=norm,
    values=np.linspace(-0.05, 0.1, 100),
    orientation='vertical'
)
cb1.set_label(
    'AA frequency relative to expected', 
    rotation=-90, labelpad=9
)
ax.yaxis.set_ticks_position('right')
ax.yaxis.set_label_position('right')
ax.tick_params(axis='y', labelsize=7, length=2, pad=2)


# Save and display 
filename = 'figures/fig3b_fit4function_aa_distribution'
fig.savefig('{}.png'.format(filename), dpi=300)
fig.savefig('{}_600dpi.svg'.format(filename), dpi=600)
fig.savefig('{}_1200dpi.svg'.format(filename), dpi=1200)

plt.close()

Image(filename + '.png')