# **2024.01.28 PURExpress with DNA: MGA and MGA-deGFP**
This experiment was conducted using PURExpress from NEB. We added DNA in NFW to the PURE reactions, MG, and measured the MGA measurement (610/650). The DNA constructed used were pT7_MGapt (5 nM) and pT7_MGApt_UTR1_deGFP (5 nM). Reactions we mixed together with 1.05 excess then 10 uL was added to each well.

# Importing required packages and definitions

In [1]:
#workhorses
import numpy as np
import pandas as pd
import math
from bokeh.models import LogScale
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import random 
from scipy import stats
from scipy.signal import savgol_filter
import bokeh
import matplotlib.pyplot as plt
# bokeh.io.output_notebook()

import holoviews as hv
hv.extension('bokeh')
# bebi103.hv.set_defaults()
from sklearn.metrics import r2_score
# from bokeh.io import gridplot, output_file, show
from bokeh.io import export_png
from bokeh.models import Title
from bokeh.plotting import gridplot,figure, output_file, show
from bokeh.models.glyphs import Text

from bokeh.io import export_svgs

#for custom colormaps
from matplotlib.colors import LinearSegmentedColormap

#Get directory
import os
directory = os.getcwd()

## Bokeh

In [2]:
%matplotlib inline
import bokeh.io
import bokeh.plotting
bokeh.io.output_notebook()
from bokeh.themes import Theme

# Modules needed from Bokeh.
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import LinearAxis, Range1d

colors = bokeh.palettes.Colorblind[8]
colorPurples=['#dadaeb','#bcbddc','#9e9ac8','#756bb1','#54278f','#4a1486']

try:
    import dnaplotlib as dpl
    dpl_enabled = True
except (ModuleNotFoundError,ImportError) as e:
    dpl_enabled = False
    
theme = Theme(json={'attrs': {
# apply defaults to Figure properties
'Figure': {
    'toolbar_location': 'right',
    'outline_line_color': None,
    'min_border_right': 10,
#     'sizing_mode': 'stretch_width',
    'height':600,
    'width':800,
},'Grid': {
    'grid_line_color': None,
},
'Title': {
    'text_font_size': '20pt',
    'align': 'center'
},
    
    
# apply defaults to Axis properties
'Axis': {
#     'minor_tick_out': None,
#     'minor_tick_in': None,
    'major_label_text_font_size': '15pt',
    'axis_label_text_font_size': '15pt',
#     'axis_label_text_font': 'Work Sans',
    'axis_label_text_font_style': 'normal',
    'axis_label_standoff':15
},


# apply defaults to Legend properties
'Legend': {
    'background_fill_alpha': 0.8,
    'location': 'top_right',
    "label_text_font_size": '15pt'
}}})

bokeh.io.curdoc().theme = theme
from bokeh.io import export_png

In [3]:
#plotting things

#%matplotlib qt5 -- I don't know what this is
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

from cycler import cycler


#All of Anandh's customized seaborn/matplotlib settings

sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 1.5})
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

#%config InlineBackend.figure_f.ormats=['svg']

mpl.rc('axes', prop_cycle=(cycler('color', ['r', 'k', 'b','g','y','m','c']) ))

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

#mpl.rc('text', usetex=False)
#mpl.rc('text.latex', preamble=r'\usepackage{helvet}
#\renewcommand\familydefault{\sfdefault}\usepackage{sansmath}\sansmath')

    #If you want to use a different font
# mpl.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 
#                  'serif': ['Helvetica']})

tw = 1.5
sns.set_style({"xtick.major.size": 3, "ytick.major.size": 3,
               "xtick.minor.size": 2, "ytick.minor.size": 2,
               'axes.labelsize': 16, 'axes.titlesize': 16,
               'xtick.major.width': tw, 'xtick.minor.width': tw,
               'ytick.major.width': tw, 'ytick.minor.width': tw})

mpl.rc('xtick', labelsize=14) 
mpl.rc('ytick', labelsize=14)
mpl.rc('axes', linewidth=1.5)
mpl.rc('legend', fontsize=14)

mpl.rc('figure', figsize=(11,9))


In [4]:
%matplotlib inline
import bokeh.io
import bokeh.plotting
bokeh.io.output_notebook()
from bokeh.themes import Theme

# Modules needed from Bokeh.
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import LinearAxis, Range1d

colors = bokeh.palettes.Colorblind[8]

try:
    import dnaplotlib as dpl
    dpl_enabled = True
except (ModuleNotFoundError,ImportError) as e:
    dpl_enabled = False
    
theme = Theme(json={'attrs': {
# apply defaults to Figure properties
'Figure': {
    'toolbar_location': 'right',
    'outline_line_color': None,
    'min_border_right': 10,
#     'sizing_mode': 'stretch_width',
    'height':600,
    'width':800,
},'Grid': {
    'grid_line_color': None,
},
'Title': {
    'text_font_size': '20pt',
    'align': 'center'
},
    
    
# apply defaults to Axis properties
'Axis': {
#     'minor_tick_out': None,
#     'minor_tick_in': None,
    'major_label_text_font_size': '15pt',
    'axis_label_text_font_size': '15pt',
#     'axis_label_text_font': 'Work Sans',
    'axis_label_text_font_style': 'normal',
    'axis_label_standoff':15
},


# apply defaults to Legend properties
'Legend': {
    'background_fill_alpha': 0.8,
    'location': 'top_right',
    "label_text_font_size": '15pt'
}}})

bokeh.io.curdoc().theme = theme
from bokeh.io import export_png

In [5]:
def create_custom_plot(title_text, x_max=8,y_max=2, xname='Time (hours)', yname='MGapt (μM)',height=400, width=500):
    custom_plot = figure(
        toolbar_location='right',
        outline_line_color=None,
        min_border_right=10,
        height=height,
        width=width,)

    custom_plot.title.text = title_text
    custom_plot.xaxis.axis_label = xname
    custom_plot.yaxis.axis_label = yname
    custom_plot.y_range = Range1d(0, y_max)
    custom_plot.x_range = Range1d(0, x_max)
    custom_plot.outline_line_color = None

    # custom_plot.yaxis
    custom_plot.ygrid.visible = False
    custom_plot.yaxis.axis_label_text_font_size = '15pt'
    custom_plot.yaxis.major_label_text_font_size = '15pt'
    custom_plot.yaxis.major_label_text_font = 'Work Sans'
    custom_plot.yaxis.axis_label_standoff = 15
    custom_plot.yaxis.axis_label_text_font_style = 'normal'

    # custom_plot.xaxis
    custom_plot.xgrid.visible = False
    custom_plot.xaxis.axis_label_text_font_size = '15pt'
    custom_plot.xaxis.major_label_text_font_size = '15pt'
    custom_plot.xaxis.major_label_text_font = 'Work Sans'
    custom_plot.xaxis.axis_label_standoff = 15
    custom_plot.xaxis.axis_label_text_font_style = 'normal'

    # custom_plot.title
    custom_plot.title.text_font_size = '18pt'
    custom_plot.title.align = "left"
    custom_plot.title.offset=-70

    return custom_plot

In [6]:
array_repeats = [0,1,2,3,4,5,6,7,8,9,10]

In [7]:
def calibrateBiotek4(df, var):
    if var=='MGapt':
        cal_data=(df-12.46)/21.39/1000
    elif var=='GFP':
        cal_data=df/5542.4
    else:
        print('Error-Cannot calibrate.')
        
    # cal_data=cal_data-cal_data[0]
    return(cal_data)

In [8]:
def Cal_avesNsems(df, DF_neg=None, norm=False):
    num=len(df['well'].unique())
    length=int(len(df)/num)
    coln=array_repeats[0:num]
    arr = df['value'].values.copy()
    arr.resize(num,length)
    DF=pd.DataFrame(arr).T
    
    #Subtract the negative control
    if DF_neg is None:
        pass
    else:
        for n in coln:
            DF[n]=DF[n]-DF_neg['value_ave']
    
    #Normalize data if needed    
    if norm==False:
        pass
    else:
        for n in coln:
            DF[n]=DF[n]/DF[n].max()
            
    DF['value_ave']=DF.iloc[:, coln].mean(axis=1)    
    DF['sem']=stats.sem(DF.iloc[:,coln].T)
    DF['Time']=df['Time'].reset_index(drop=True)[0:length]*60
    # Add error bars to the DataFrame
    DF['error_low'] = DF['value_ave'] - DF['sem']
    DF['error_high'] = DF['value_ave'] + DF['sem']
    return(DF)

In [9]:
def Circle_wErrorPlot(plot, DF, ind, color='black',cal=False, marker="circle",size=5,):
    
    if cal!=False:
        #Data from experiments
        plot.scatter(
            x=DF['Time']/3600, y=calibrateBiotek4(DF['value_ave'], cal),  marker=marker,color= color, size=size, fill_alpha=0.2,
            legend_label= ind)
        # Add error bars
        plot.segment(
            x0=DF['Time']/3600, y0=calibrateBiotek4(DF['error_low'], cal),
            x1=DF['Time']/3600, y1=calibrateBiotek4(DF['error_high'], cal),
            line_width=2, color= color, line_alpha=.25)
    else:
        plot.scatter(
            x=DF['Time']/3600, y=(DF['value_ave']),  marker=marker,color= color, size=size, fill_alpha=0.2,
            legend_label= ind,)
        # Add error bars
        plot.segment(
            x0=DF['Time']/3600, y0=(DF['error_low']),
            x1=DF['Time']/3600, y1=(DF['error_high']),
            line_width=2, color= color, line_alpha=.25)
    
    plot.legend.location="top_left"
    plot.legend.click_policy="hide"
    plot.legend.border_line_color = None
    plot.legend.background_fill_color = None
    return(plot)

In [10]:
def updateLegend(plot, title="", location='top_left',):
    plot.legend.location=location
    plot.legend.click_policy="hide"
    # Remove the box around the legend
    plot.legend.border_line_color = None
    # Add a legend and set its title
    plot.legend.title = title
    plot.legend.title_text_font_style = "normal"
    # plot.legend.title_text_font_size = '15pt'
    # plot.add_layout(plot.legend[0],layout_loc)
    return(plot)

# Calibrated with dynamic calibration

In [11]:
directory_cal=r'C:\Users\zoila\Box\biocircuits\ZJurado\Projects\Organelles\PURExpress_frex_systems\2023.07.28_PURE_w.mRNAonly'

In [12]:
dna= 'MGapt-deGFP'
df_cal=pd.read_csv(directory_cal+'/' + dna+'_mRNA_dyCal_final_smooth.csv')
# df_cal=pd.read_csv(directory_cal+'/' + dna+'_mRNA_dyCal_final.csv')
df_calT=pd.read_csv(directory_cal+'/' + dna+'time_dyCal_final.csv')

## get data

In [13]:
directory_data=r'C:\Users\zoila\Box\biocircuits\ZJurado\Projects\Organelles\PURExpress_frex_systems\2024.01.26_PURExpress_DNAconcs_v2'
filename = '/2024.01.26_PURExpress_DNAconcs_v2.xlsx'
data_dict = pd.read_excel(directory_data + filename, sheet_name=None, engine='openpyxl')
# sheets=data_dict.keys()

sheets_tidy= [x for x in data_dict.keys() if '_tidy' in x]
df_gfp= pd.read_csv(directory_data +'/'+ sheets_tidy[0]+'.csv')
df_mgapt=pd.read_csv(directory_data +'/'+ sheets_tidy[1]+'.csv')
df_gfp=df_gfp[df_gfp['na']!='empty']

df_mgapt=df_mgapt[df_mgapt['na']!='empty']

  warn(msg)
  warn(msg)


# Plots

## deGFP 

In [23]:
#Setup plot Full data
pDNAvar_gfp=create_custom_plot('', x_max=3, y_max=7, yname='deGFP (μM)')
count=0
for na_conc in df_gfp['na_conc'].unique():
    data=df_gfp[df_gfp['na_conc']==na_conc]
    DF=Cal_avesNsems(data)
    Circle_wErrorPlot(pDNAvar_gfp, DF[:60], str(na_conc)+' nM', colorPurples[5-count],cal='GFP',)

    filename = 'Final_DNA_eff/MGapt-deGFP_w'+str(na_conc)+'nM.csv'
    PURE_Model_Final=pd.read_csv(directory+'/' + filename)
    pDNAvar_gfp.line(PURE_Model_Final['time']/3600, PURE_Model_Final['deGFP_m']*.7,legend_label = str(na_conc)+' nM', line_color=colorPurples[5-count], 
        line_dash= "solid", line_width=3,line_alpha=.5) 
    
    data_end=np.round(DF['value_ave'][:60].max()/5542.4,2)
    sim_end=np.round(PURE_Model_Final['deGFP_m'].max()*.7,2)
    error=np.abs(data_end-sim_end)/data_end*100
    
    print(f"{na_conc} DNA makes {data_end} (data) vs. {sim_end} (model) for {np.round(error,2)}%")
    print(f"DNA start: {PURE_Model_Final['DNA'][0]*1000}")
    count+=1
    
pDNAvar_gfp=updateLegend(pDNAvar_gfp)
bokeh.io.show(pDNAvar_gfp)

pDNAvar_gfp.output_backend = "svg"
export_svgs(pDNAvar_gfp, filename = 'Figures/pDNAvar_gfp.svg',width=500, height=400)


1.99 DNA makes 6.8 (data) vs. 5.93 (model) for 12.79%
DNA start: 4.0499669732684005
1.06 DNA makes 5.88 (data) vs. 5.37 (model) for 8.67%
DNA start: 3.0592511655882
0.47 DNA makes 4.53 (data) vs. 4.1 (model) for 9.49%
DNA start: 1.7880209999999
0.26 DNA makes 3.3 (data) vs. 2.74 (model) for 16.97%
DNA start: 0.9891179999999999
0.12 DNA makes 2.34 (data) vs. 1.45 (model) for 38.03%
DNA start: 0.4565159999999
0.07 DNA makes 1.52 (data) vs. 0.89 (model) for 41.45%
DNA start: 0.266301


['Figures/pDNAvar_gfp.svg']

## MGapt

In [15]:
#Setup plot Full data
pDNAvar_rna=create_custom_plot('', x_max=3, y_max=3)
count=0
for na_conc in df_mgapt['na_conc'].unique():
    data=df_mgapt[df_mgapt['na_conc']==na_conc]
    DF=Cal_avesNsems(data)
    for col in [0,1,2,'value_ave','sem','error_low', 'error_high']:
        DF[col]=((DF[col])/df_cal.T).T
    Circle_wErrorPlot(pDNAvar_rna, DF[:60], str(na_conc)+' nM', colorPurples[5-count])
    
    filename = 'Final_DNA_eff/MGapt-deGFP_w'+str(na_conc)+'nM.csv'
    PURE_Model_Final=pd.read_csv(directory+'/' + filename)
    pDNAvar_rna.line(PURE_Model_Final['time']/3600, PURE_Model_Final['MGapt'],legend_label = str(na_conc)+' nM', line_color=colorPurples[5-count], 
        line_dash= "solid", line_width=3,line_alpha=.5) 
    
    data_end=np.round(DF['value_ave'][60],2)
    sim_end=np.round(PURE_Model_Final['MGapt'].max(),2)
    error=np.abs(data_end-sim_end)/data_end*100
    
    print(f"{na_conc} DNA makes {data_end} (data) vs. {sim_end} (model) for {np.round(error,2)}%")
    count+=1

pDNAvar_rna=updateLegend(pDNAvar_rna)
bokeh.io.show(pDNAvar_rna)

pDNAvar_rna.output_backend = "svg"
export_svgs(pDNAvar_rna, filename = 'Figures/pDNAvar_rna.svg',width=500, height=400)

1.99 DNA makes 2.42 (data) vs. 2.34 (model) for 3.31%
1.06 DNA makes 1.88 (data) vs. 1.85 (model) for 1.6%
0.47 DNA makes 1.33 (data) vs. 1.16 (model) for 12.78%
0.26 DNA makes 0.84 (data) vs. 0.68 (model) for 19.05%
0.12 DNA makes 0.45 (data) vs. 0.33 (model) for 26.67%
0.07 DNA makes 0.23 (data) vs. 0.2 (model) for 13.04%


['Figures/pDNAvar_rna.svg']

In [16]:
stop

NameError: name 'stop' is not defined

# Plotting the data

In [17]:
df_Data=pd.DataFrame(columns=['dna','rna', 'rhigh','rlow', 
                              'gfp','glow','ghigh'])
j=0

for na_conc in df_gfp['na_conc'].unique():
    gfp=df_gfp[df_gfp['na_conc']==na_conc]
    DF_gfp=Cal_avesNsems(gfp)
    
    rna=df_mgapt[df_mgapt['na_conc']==na_conc]
    DF_rna=Cal_avesNsems(rna)
    for col in [0,1,2,'value_ave','sem','error_low', 'error_high']:
        DF_rna[col]=(DF_rna[col]/df_cal.T).T
    
    data_to_append = pd.DataFrame([[na_conc, DF_rna['value_ave'][40] ,DF_rna['error_low'][40], DF_rna['error_high'][40], 
                                    (DF_gfp['value_ave'][60]-DF_gfp['value_ave'][0])/5542.4,
                                    (DF_gfp['error_low'][60]-DF_gfp['error_low'][0])/5542.4,
                                    (DF_gfp['error_high'][60]-DF_gfp['error_high'][0])/5542.4,]],
                                  columns=['dna','rna', 'rlow','rhigh', 'gfp','glow','ghigh']) 
    df_Data = pd.concat([df_Data, data_to_append], ignore_index=True)
    
df_Data=df_Data.astype(float)
df_Data

Unnamed: 0,dna,rna,rhigh,rlow,gfp,glow,ghigh
0,1.99,2.282534,2.343292,2.221776,6.677011,6.549297,6.804725
1,1.06,1.499421,1.553228,1.445614,5.851496,5.730583,5.97241
2,0.47,0.879706,0.94188,0.817532,4.553503,4.485355,4.62165
3,0.26,0.555991,0.568128,0.543853,3.310239,3.264908,3.355569
4,0.12,0.350236,0.367712,0.33276,2.33972,2.311079,2.368362
5,0.07,0.181434,0.185437,0.177431,1.515288,1.462133,1.568444


In [18]:
####
pDataDR=create_custom_plot('', xname='DNA (nM)',x_max=2.5, y_max=2.5)
pDataDR.scatter(x=df_Data['dna'], y=df_Data['rna'], fill_alpha=0.5, color='black',size=5)
        # Add error bars
pDataDR.segment(
    x0=df_Data['dna'], y0=df_Data['rlow'],
    x1=df_Data['dna'], y1=df_Data['rhigh'],
    line_width=2, color= 'black', line_alpha=.25)

###
pDataRP=create_custom_plot('', x_max=2.5, y_max=7,xname='RNA (μM)',yname='deGFP (μM)')   
pDataRP.scatter(x=df_Data['rna'], y=df_Data['gfp'], fill_alpha=0.5, color='black',size=5)
        # Add error bars
pDataRP.segment(
    x0=df_Data['rna'], y0=df_Data['glow'],
    x1=df_Data['rna'], y1=df_Data['ghigh'],
    line_width=2, color= 'black', line_alpha=.25)

pDataRP.segment(
    x0=df_Data['rlow'], y0=df_Data['gfp'],
    x1=df_Data['rhigh'], y1=df_Data['gfp'],
    line_width=2, color= 'black', line_alpha=.25)

####
pDataDP=create_custom_plot('', xname='DNA (nM)',yname='deGFP (μM)',x_max=2.5, y_max=7)
pDataDP.scatter(x=df_Data['dna'], y=df_Data['gfp'], fill_alpha=0.5, color='black',size=5)
        # Add error bars
pDataDP.segment(
    x0=df_Data['dna'], y0=df_Data['glow'],
    x1=df_Data['dna'], y1=df_Data['ghigh'],
    line_width=2, color= 'black', line_alpha=.25)
####
layout = gridplot([[pDataDR,pDataRP,pDataDP]])
show(layout)

In [19]:

pDataDR.output_backend = "svg"
export_svgs(pDataDR, filename = 'Figures/pDataDR.svg',width=500, height=400)


pDataRP.output_backend = "svg"
export_svgs(pDataRP, filename = 'Figures/pDataRP.svg',width=500, height=400)


pDataDP.output_backend = "svg"
export_svgs(pDataDP, filename = 'Figures/pDataDP.svg',width=500, height=400)

['Figures/pDataDP.svg']

In [20]:
p=create_custom_plot('', x_max=2,y_max=4.5, yname='Effective DNA (nM)',xname='DNA (nM)')
y=[0, .32, 1.78, 4.05]
x=[0, .07, .47, 1.99]

x1=np.linspace(0,x[2],100)
x2=np.linspace(x[2],x[3],100)
y1= 3.8043*x1
y2= 1.5729*np.log(x2) + 2.9676

p.circle(x, y,color='black', legend_label = 'Experimental data',size=5)
p.line(x1, y1, line_dash='dashed',line_width=2, color='black',alpha=.5, legend_label = 'Logarithmic fit')
p.line(x2, y2, line_dash='dashed',line_width=2, color='grey',alpha=.5, legend_label = 'Logarithmic fit')

# # add a label to the top-left corner of the plot
# # label = Label(x=1.5, y=4, text="0.5252*ln(MGapt)+ 2.9388", text_align="left", text_baseline="top")
# # pRNAvGFP.add_layout(label)

p=updateLegend(p, title="", location='top_left',)
bokeh.io.show(p)

p.output_backend = "svg"
export_svgs(p, filename = 'Figures/pDNAfit.svg',width=500, height=400)


['Figures/pDNAfit.svg']

# Computing environment

In [None]:
%load_ext watermark
%watermark -v -p bioscrape,bokeh,panel,jupyterlab,biocrnpyler