In [1]:
import pandas
import numpy as np
import pylab as plt
import scipy
import statsmodels.api as sm
from scipy.optimize import curve_fit
import os
os.environ["PATH"] += os.pathsep + '/usr/local/texlive/2021/bin/x86_64-linux'
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

In [2]:
plt.style.use('bmh')
plt.rc('text', usetex=True)

In [3]:
font = {'family':'serif','size':30, 'serif': ['computer modern roman']}
plt.rc('font',**font)

In [4]:
plt.rcParams['text.latex.preamble'] = r'\usepackage{amsmath}'
plt.rc('axes', grid=False, facecolor='white')

In [5]:
#Importing four Excel spread sheets with fluorescent data into Python
#The experiment here was carried out with 8 concentrations of Opt-2, 25 nM PLE, and 1 uM Glu-C
#The imported data represents raw data that has not been processed
data_high_conc_rep1 = pandas.read_excel('High-Opt-2-concentration-range-kcat-km-rep-1-25-nM-PLE-1-uM-Glu-C-raw-fluorescence-vs-time.xlsx',engine='openpyxl')
data_high_conc_rep2 = pandas.read_excel('High-Opt-2-concentration-range-kcat-km-rep-2-25-nM-PLE-1-uM-Glu-C-raw-fluorescence-vs-time.xlsx',engine='openpyxl')
data_low_conc_rep1 = pandas.read_excel('Low-Opt-2-concentration-range-kcat-km-rep-1-25-nM-PLE-1-uM-Glu-C-raw-fluorescence-vs-time.xlsx',engine='openpyxl')
data_low_conc_rep2 = pandas.read_excel('Low-Opt-2-concentration-range-kcat-km-rep-2-25-nM-PLE-1-uM-Glu-C-raw-fluorescence-vs-time.xlsx',engine='openpyxl')

In [6]:
#Reading in the columns
data_high_conc_rep1_column = data_high_conc_rep1.columns
data_high_conc_rep2_column = data_high_conc_rep2.columns
data_low_conc_rep1_column = data_low_conc_rep1.columns
data_low_conc_rep2_column = data_low_conc_rep2.columns

In [7]:
#Importing times corresponding to fluorescence data points
T_HR_rep1 = (data_high_conc_rep1[data_high_conc_rep1_column[1]].values)[1:]
T_HR_rep1 = np.array(T_HR_rep1,dtype='float64')

T_HR_rep2 = (data_high_conc_rep2[data_high_conc_rep2_column[1]].values)[1:]
T_HR_rep2 = np.array(T_HR_rep2,dtype='float64')

T_LR_rep1 = (data_low_conc_rep1[data_low_conc_rep1_column[1]].values)[1:]
T_LR_rep1 = np.array(T_LR_rep1,dtype='float64')

T_LR_rep2 = (data_low_conc_rep2[data_low_conc_rep2_column[1]].values)[1:]
T_LR_rep2 = np.array(T_LR_rep2,dtype='float64')

#Defining a list of substrate concentrations used in the experiment
scaling = 0.7
Sub_conc = [10, 10*scaling, 10*scaling**2, 10*scaling**3, 10*scaling**4,10*scaling**5, 10*scaling**6,10*scaling**7]


In [8]:
#Importing background fluorescence values of the buffer 
buffer_HR_rep1 = (data_high_conc_rep1[data_high_conc_rep1_column[-3:]].values)[1:].T
buffer_LR_rep1 = (data_low_conc_rep1[data_low_conc_rep1_column[-3:]].values)[1:].T
buffer_HR_rep2 = (data_high_conc_rep2[data_high_conc_rep2_column[-3:]].values)[1:].T
buffer_LR_rep2 = (data_low_conc_rep2[data_low_conc_rep2_column[-3:]].values)[1:].T

In [9]:
#Defining an array with dimensions  3 x 8 x T_HR_rep where the first dimension is 3 replicates, the second is 
#concetration of substrate, and the third is fluorescent values taken every 20 seconds
F_PG_rep1 = np.zeros((3,8,len(T_HR_rep1)))
F_PG_rep2 = np.zeros((3,8,len(T_HR_rep2)))

#"PG" corresponds to esterified substrate + PLE + Glu-C, "P" to substrate + PLE, and "G" to substrate + Glu-C
#"Fmax" corresponds to fluorescence of Opt + Glu-C
#Filling the array with data
for a in range(3):
    for b in range(4):
        F_PG_rep1[a,b,:] = (data_high_conc_rep1[data_high_conc_rep1_column[2+a+3*b]].values)[1:] - buffer_HR_rep1[a,:]
        F_PG_rep2[a,b,:] = (data_high_conc_rep2[data_high_conc_rep2_column[2+a+3*b]].values)[1:] - buffer_HR_rep2[a,:]
        F_PG_rep1[a,4+b,:] = (data_low_conc_rep1[data_low_conc_rep1_column[2+a+3*b]].values)[1:] - buffer_LR_rep1[a,:]
        F_PG_rep2[a,4+b,:] = (data_low_conc_rep2[data_low_conc_rep2_column[2+a+3*b]].values)[1:] - buffer_LR_rep2[a,:]
        
        
F_P_rep1 = np.zeros((3,8,len(T_HR_rep1))) 
F_P_rep2 = np.zeros((3,8,len(T_HR_rep2)))

for a in range(3):
    for b in range(4):
        F_P_rep1[a,b,:] = (data_high_conc_rep1[data_high_conc_rep1_column[14+a+3*b]].values)[1:] - buffer_HR_rep1[a,:]
        F_P_rep2[a,b,:] = (data_high_conc_rep2[data_high_conc_rep2_column[14+a+3*b]].values)[1:] - buffer_HR_rep2[a,:]
        F_P_rep1[a,4+b,:] = (data_low_conc_rep1[data_low_conc_rep1_column[14+a+3*b]].values)[1:] - buffer_LR_rep1[a,:]
        F_P_rep2[a,4+b,:] = (data_low_conc_rep2[data_low_conc_rep2_column[14+a+3*b]].values)[1:] - buffer_LR_rep2[a,:]

In [10]:
F_G_rep1 = np.zeros((3,8,len(T_HR_rep1))) 
F_G_rep2 = np.zeros((3,8,len(T_HR_rep2)))

for a in range(3):
    for b in range(4):
        F_G_rep1[a,b,:] = (data_high_conc_rep1[data_high_conc_rep1_column[26+a+3*b]].values)[1:] - buffer_HR_rep1[a,:]
        F_G_rep2[a,b,:] = (data_high_conc_rep2[data_high_conc_rep2_column[26+a+3*b]].values)[1:] - buffer_HR_rep2[a,:]
        F_G_rep1[a,4+b,:] = (data_low_conc_rep1[data_low_conc_rep1_column[26+a+3*b]].values)[1:] - buffer_LR_rep1[a,:]
        F_G_rep2[a,4+b,:] = (data_low_conc_rep2[data_low_conc_rep2_column[26+a+3*b]].values)[1:] - buffer_LR_rep2[a,:]   
    
F_max_rep1 = np.zeros((3,8,len(T_HR_rep1)))
F_max_rep2 = np.zeros((3,8,len(T_HR_rep2)))

for a in range(3):
    for b in range(4):
        F_max_rep1[a,b,:] = (data_high_conc_rep1[data_high_conc_rep1_column[38+a+3*b]].values)[1:] - buffer_HR_rep1[a,:]
        F_max_rep2[a,b,:] = (data_high_conc_rep2[data_high_conc_rep2_column[38+a+3*b]].values)[1:] - buffer_HR_rep2[a,:]
        F_max_rep1[a,4+b,:] = (data_low_conc_rep1[data_low_conc_rep1_column[38+a+3*b]].values)[1:] - buffer_LR_rep1[a,:]
        F_max_rep2[a,4+b,:] = (data_low_conc_rep2[data_low_conc_rep2_column[38+a+3*b]].values)[1:] - buffer_LR_rep2[a,:]


In [11]:
#Taking the average of times across 4 datasets         
T = (T_HR_rep1 + T_HR_rep2 + T_LR_rep1 + T_LR_rep2)/4 #assume no error here

#Adding 15 seconds to each time point because the measurements started at t = 15 sec (shaking for first 15 s)
T = T + 15  
    
#After defining all columns, it is useful to check that everything has been imported correctly as follows:    
#print(F_max_rep2[1,5])

In [12]:
#Randomly pairing F_PG, F_G, F_max, and F_P from either from rep1 (and then doing the same thing with rep2)
Pt_rep1 = np.zeros((3**4,8,len(T)))
Pt_rep2 = np.zeros((3**4,8,len(T)))

import itertools
for k in range(8):
    index = 0
    for a,b,c,d in itertools.product(range(3),repeat=4):
        Pt_rep1[index,k,:] = Sub_conc[k]*(F_PG_rep1[a,k]-F_G_rep1[b,k])/(F_max_rep1[c,k]-F_P_rep1[d,k])
        Pt_rep2[index,k,:] = Sub_conc[k]*(F_PG_rep2[a,k]-F_G_rep2[b,k])/(F_max_rep2[c,k]-F_P_rep2[d,k])
        index += 1
    
#Finding standard deviation of all product concentrations that resulted from random pairings
#Also doing the same for rep1
std_Pt_rep1 = np.std(Pt_rep1, axis=0)
std_Pt_rep2 = np.std(Pt_rep2, axis=0)

#Propagating error for finding standard deviation in the two replicates 
Pt_std = np.sqrt((std_Pt_rep1**2 + std_Pt_rep2**2)/4)
    
Pt = np.concatenate((Pt_rep1,Pt_rep2),axis=0)

#finding the average of all product concentrations that resulted from random pairings
Pt_averaged = np.mean(Pt,axis=0)
                      
#Exporting Pt_averaged values, corresponding STD values, and time values into Excel for plotting 
Pt_averaged_df = pandas.DataFrame(Pt_averaged)
Pt_std_df = pandas.DataFrame(Pt_std)
T_df = pandas.DataFrame(T)

with pandas.ExcelWriter('OUTPUT_kcat_kM_experiment_Opt-2_averaged_Product_Concentration_std_and_Time.xlsx', engine='openpyxl') as writer:
    Pt_averaged_df.to_excel(writer, sheet_name='Pt_averaged')
    Pt_std_df.to_excel(writer, sheet_name='Pt_std')
    T_df.to_excel(writer, sheet_name='Time')

rounded_conc = [round(x, 1) for x in Sub_conc] #rounding concentrations to display in plots

'''
#Remove commenting out for plotting
#The part below is a visual check (plotting Pt_averaged vs time)
plt.figure(figsize=(10, 6))
for i in range(Pt_averaged.shape[0]):
        plt.plot(T, Pt_averaged[i, :], label=f'{rounded_conc[i]} uM of subst.')
        plt.plot(T, Pt_averaged[i, :]+Pt_std[i, :], alpha=0.3)
        plt.plot(T, Pt_averaged[i, :]-Pt_std[i, :], alpha=0.3)
plt.xlabel('Time (s)')
plt.ylabel('[P]t (uM)')
plt.title('Pt_Averaged with STD error vs Time')
plt.grid(True)
plt.legend(fontsize='x-small')
plt.show()
'''

"\n#Remove commenting out for plotting\n#The part below is a visual check (plotting Pt_averaged vs time)\nplt.figure(figsize=(10, 6))\nfor i in range(Pt_averaged.shape[0]):\n        plt.plot(T, Pt_averaged[i, :], label=f'{rounded_conc[i]} uM of subst.')\n        plt.plot(T, Pt_averaged[i, :]+Pt_std[i, :], alpha=0.3)\n        plt.plot(T, Pt_averaged[i, :]-Pt_std[i, :], alpha=0.3)\nplt.xlabel('Time (s)')\nplt.ylabel('[P]t (uM)')\nplt.title('Pt_Averaged with STD error vs Time')\nplt.grid(True)\nplt.legend(fontsize='x-small')\nplt.show()\n"

In [13]:
#Defining kI found in previous experiment along with its standard deviation    
kI = 0.003185705691107232
kI_SD = 6.086274191781751e-05

In [14]:
#The formula for At = Pt + kI*(dPdt)
#We will first find dPdt to eventually find A

#In the manuscript, n = nr + nl
n = 4 #Spacing of n between subsequent concentrations was chosen to compute the derivative dPdt
#Note that the value of dPdt is not very sensitive to the choice of n

#dPdt is easy to plot, but error propagation on dPdt from error on Pt_averaged is challenging
#To avoid challenging math/overestimating the error, we will take an alternative appriach:
#For each of the 6 replicates (3 technical, 2 indepednent), dPdt will be found (6 values per concentration)
#Then, the regular error propagation formula will be applied to the 6 resultant values of dPdt 

#Finding derivative curves for 81 Pt curves in each of the rep (rep 1 and rep 2)
dPdt_rep1 = (Pt_rep1[:,:,n:] - Pt_rep1[:,:,:-n])/(T[n:] - T[:-n]) #Computing dPdt for 81 Pt curves in rep1 
dPdt_rep2 = (Pt_rep2[:,:,n:] - Pt_rep2[:,:,:-n])/(T[n:] - T[:-n]) #Computing dPdt for 81 Pt curves in rep2

#Finding standard deviations corresponding to the derivative curves
std_dPdt_rep1 = np.std(dPdt_rep1, axis=0)
std_dPdt_rep2 = np.std(dPdt_rep2, axis=0)

#Propagating error for finding standard deviation in the two replicates 
dPdt_std = np.sqrt((std_dPdt_rep1**2 + std_dPdt_rep2**2)/4)
dPdt = np.concatenate((dPdt_rep1,dPdt_rep2),axis=0)

#Dinding the average of all derivative curves that resulted from product curves
dPdt_averaged = np.mean(dPdt,axis=0)

T_der = (T[n:] + T[:-n])/2 #Computing times corresponding to derivative dPdt

'''
#Remove commenting out for plotting
#The part below is a visual check (plotting Pt_averaged vs time)
plt.figure(figsize=(10, 6))
for i in range(Pt_averaged.shape[0]):
        plt.plot(T_der, dPdt_averaged[i, :], label=f'{rounded_conc[i]} uM of subst.')
        plt.plot(T_der, dPdt_averaged[i, :]+dPdt_std[i, :], alpha=0.3)
        plt.plot(T_der, dPdt_averaged[i, :]-dPdt_std[i, :], alpha=0.3)
plt.xlabel('Time (s)')
plt.ylabel('dPdt (uM s-1)')
plt.title('dPdt_Averaged with STD error vs T_der')
plt.grid(True)
plt.legend(fontsize='x-small')
plt.show()
'''


"\n#Remove commenting out for plotting\n#The part below is a visual check (plotting Pt_averaged vs time)\nplt.figure(figsize=(10, 6))\nfor i in range(Pt_averaged.shape[0]):\n        plt.plot(T_der, dPdt_averaged[i, :], label=f'{rounded_conc[i]} uM of subst.')\n        plt.plot(T_der, dPdt_averaged[i, :]+dPdt_std[i, :], alpha=0.3)\n        plt.plot(T_der, dPdt_averaged[i, :]-dPdt_std[i, :], alpha=0.3)\nplt.xlabel('Time (s)')\nplt.ylabel('dPdt (uM s-1)')\nplt.title('dPdt_Averaged with STD error vs T_der')\nplt.grid(True)\nplt.legend(fontsize='x-small')\nplt.show()\n"

In [15]:
#The formula for At is At = Pt + kI*(dPdt) or At = Pt + It
#The time corresponding to derivative is in the middle of time points; hence need to find Pt
#that would correspond to the time at which the derivative was found

#The errors here were computed the same way as for dPdt
P_mid_rep1 = (Pt_rep1[:,:,n:] + Pt_rep1[:,:,:-n])/2
P_mid_rep2 = (Pt_rep2[:,:,n:] + Pt_rep2[:,:,:-n])/2 

std_P_mid_rep1 = np.std(P_mid_rep1, axis=0)
std_P_mid_rep2 = np.std(P_mid_rep2, axis=0)

P_mid_std = np.sqrt((std_P_mid_rep1**2 + std_P_mid_rep2**2)/4)
P_mid = np.concatenate((P_mid_rep1,P_mid_rep2),axis=0)

P_mid_averaged = np.mean(P_mid,axis=0)

'''
#Remove commenting out for plotting
#The part below is a visual check (plotting Pt_averaged vs time)
plt.figure(figsize=(10, 6))
for i in range(Pt_averaged.shape[0]):
        plt.plot(T_der, P_mid_averaged[i, :], label=f'{rounded_conc[i]} uM of subst.')
        plt.plot(T_der, P_mid_averaged[i, :]+P_mid_std[i, :], alpha=0.3)
        plt.plot(T_der, P_mid_averaged[i, :]-P_mid_std[i, :], alpha=0.3)
plt.xlabel('Time (s)')
plt.ylabel('P_mid (uM)')
plt.title('P_mid_Averaged with STD error vs T_der')
plt.grid(True)
plt.legend(fontsize='x-small')
plt.show()
'''

"\n#Remove commenting out for plotting\n#The part below is a visual check (plotting Pt_averaged vs time)\nplt.figure(figsize=(10, 6))\nfor i in range(Pt_averaged.shape[0]):\n        plt.plot(T_der, P_mid_averaged[i, :], label=f'{rounded_conc[i]} uM of subst.')\n        plt.plot(T_der, P_mid_averaged[i, :]+P_mid_std[i, :], alpha=0.3)\n        plt.plot(T_der, P_mid_averaged[i, :]-P_mid_std[i, :], alpha=0.3)\nplt.xlabel('Time (s)')\nplt.ylabel('P_mid (uM)')\nplt.title('P_mid_Averaged with STD error vs T_der')\nplt.grid(True)\nplt.legend(fontsize='x-small')\nplt.show()\n"

In [16]:
I = dPdt_averaged/kI #Finding I 
I_std = I * np.sqrt((dPdt_std/dPdt_averaged)**2 + (kI_SD/kI)**2) #Doing error propagation on I

'''
#Remove commenting out for plotting
#The part below is a visual check(plotting [I]t with error vs time)
plt.figure(figsize=(10, 6))
for i in range(I.shape[0]):
        plt.plot(T_der, I[i, :], label=f'{rounded_conc[i]} uM of subst.')
        plt.plot(T_der, I[i, :]-I_std[i, :], alpha=0.3)
        plt.plot(T_der, I[i, :]+I_std[i, :], alpha=0.3)
plt.xlabel('Time (s)')
plt.ylabel('[I]t (uM)')
plt.title('[I]t Averaged vs T_der')
plt.grid(True)
plt.legend(fontsize='x-small')
plt.show()
'''
A = I + P_mid_averaged #Finding A
A_std = np.sqrt((I_std)**2+(P_mid_std)**2) #Doing error propagation on A

'''
#Remove commenting out for plotting
#The part below is a visual check(plotting [A]t with error vs time)
plt.figure(figsize=(10, 6))
for i in range(A.shape[0]):
        plt.plot(T_der, A[i, :], label=f'{rounded_conc[i]} uM of subst.')
        plt.plot(T_der, A[i, :]-A_std[i, :], alpha=0.3)
        plt.plot(T_der, A[i, :]+A_std[i, :], alpha=0.3)
plt.xlabel('Time (s)')
plt.ylabel('[A]t (uM)')
plt.title('[A]t vs T_der')
plt.legend(fontsize='x-small')
plt.grid(True)
plt.show()
'''

"\n#Remove commenting out for plotting\n#The part below is a visual check(plotting [A]t with error vs time)\nplt.figure(figsize=(10, 6))\nfor i in range(A.shape[0]):\n        plt.plot(T_der, A[i, :], label=f'{rounded_conc[i]} uM of subst.')\n        plt.plot(T_der, A[i, :]-A_std[i, :], alpha=0.3)\n        plt.plot(T_der, A[i, :]+A_std[i, :], alpha=0.3)\nplt.xlabel('Time (s)')\nplt.ylabel('[A]t (uM)')\nplt.title('[A]t vs T_der')\nplt.legend(fontsize='x-small')\nplt.grid(True)\nplt.show()\n"

In [17]:
# Now, need to ensure that time points of A used for fitting are less than 10% of [S]0
#This is a convention (initial rates are determined in the first 10% of substrate conversion)

thresh_sub_conc = 0.1*np.array(Sub_conc) #10% threshold of [S]0
first_larger_indices = []

# Iterate over each column and its corresponding concentration and find the "cutoff" index
for idx, concentration in enumerate(thresh_sub_conc):
    column = A[idx, :]  # Get the column from A
    # Find indices where values are larger than the concentration threshold 
    larger_indices = np.where(column > concentration)[0]
    # Get the first index if any are found, otherwise return None
    first_index = larger_indices[0] if larger_indices.size > 0 else None
    first_larger_indices.append(first_index)
    
filtered_first_larger_indices = [x for x in first_larger_indices if x is not None]
cutoff = min(filtered_first_larger_indices)

#Shorten the A and T_der datasets to be within 10% of substrate conversion
shortened_A = A[:,:cutoff]
shortened_A_std = A_std[:,:cutoff]
shortened_T_der = T_der[:cutoff]

print("cutting off beyond", cutoff)

#Exporting shortened A values, STD values, and shortned T_der into Excel for plotting 
shortened_A_df = pandas.DataFrame(shortened_A)
shortened_A_std_df = pandas.DataFrame(shortened_A_std)
shortened_T_der_df = pandas.DataFrame(shortened_T_der)

with pandas.ExcelWriter('OUTPUT_kcat_kM_experiment_Opt-2_shortened_A_std_and_T_der.xlsx', engine='openpyxl') as writer:
    shortened_A_df.to_excel(writer, sheet_name='shortened_A')
    shortened_A_std_df.to_excel(writer, sheet_name='shortened_A_std')
    shortened_T_der_df.to_excel(writer, sheet_name='shortened_T_der')

'''
#Remove commenting out for plotting
#The part below is a visual check(plotting shortened_[A]t with error vs time)
plt.figure(figsize=(10, 6))
for i in range(shortened_A.shape[0]):
        plt.plot(shortened_T_der, shortened_A[i, :],label=f'{rounded_conc[i]} uM of subst.')
        plt.plot(shortened_T_der, shortened_A[i, :]-shortened_A_std[i, :], alpha=0.3)
        plt.plot(shortened_T_der, shortened_A[i, :]+shortened_A_std[i, :], alpha=0.3)
plt.xlabel('Time (s)')
plt.ylabel('Shortened [A]t (uM)')
plt.title('Shortened [A]t vs T_der')
plt.legend(fontsize='x-small')
plt.grid(True)
plt.show()
'''

cutting off beyond 88


"\n#Remove commenting out for plotting\n#The part below is a visual check(plotting shortened_[A]t with error vs time)\nplt.figure(figsize=(10, 6))\nfor i in range(shortened_A.shape[0]):\n        plt.plot(shortened_T_der, shortened_A[i, :],label=f'{rounded_conc[i]} uM of subst.')\n        plt.plot(shortened_T_der, shortened_A[i, :]-shortened_A_std[i, :], alpha=0.3)\n        plt.plot(shortened_T_der, shortened_A[i, :]+shortened_A_std[i, :], alpha=0.3)\nplt.xlabel('Time (s)')\nplt.ylabel('Shortened [A]t (uM)')\nplt.title('Shortened [A]t vs T_der')\nplt.legend(fontsize='x-small')\nplt.grid(True)\nplt.show()\n"

In [18]:
#Next, the slopes of shortened_A vs shortened_T_der graphs need to be determined (dAdt)
#Those slopes correspond to rates in the y-axis of traditional Michaelis-Menten plots
#In order for curve_fit to determine slopes correctly, the diagnoal values of the covariance matrix
#need to be smaller than the off-diagonal values (this also help verify that the fitting model is correct)
#Note that time scales are much larger than concentration scales, which can lead to numerical instability
#To stably apply curve_fit, we first normalize shortened_T_der and shortened_A and then convert them back
#To the normal scale after fitting
#The applied normalization ensures that the mean of the datasets is 0 and the standard deviation is 1 

shortened_T_der_norm = (shortened_T_der-np.mean(shortened_T_der))/np.std(shortened_T_der)
shortened_A_norm = (shortened_A-np.mean(shortened_A))/np.std(shortened_A)
shortened_A_norm_std = shortened_A_std/np.std(shortened_A_std)


#Fit to a straight line 
def linear_model(x, a, b):
    return a * x + b

dAdt = []
dAdt_SD = []

for i in range(8):
    popt, pcov = curve_fit(linear_model, shortened_T_der_norm, shortened_A_norm[i], sigma=shortened_A_norm_std[i], absolute_sigma=True)
    a, b = popt
    #The standard deviation of the slope is the square root of the top left most value in the covariance matrix
    std_errors = np.sqrt(pcov[0,0])
    dAdt.append(a)
    dAdt_SD.append(std_errors)

# Rescale the obtain values back to their original scale
factor = np.std(shortened_A)/np.std(shortened_T_der)
dAdt = np.array(dAdt)*factor
dAdt_SD = np.array(dAdt_SD)*factor

'''
#Remove commenting out for plotting
#The part below is a visual check(plotting shortened_[A]t with error vs time)
plt.plot(Sub_conc,dAdt,'r.',markersize=15)
plt.plot(Sub_conc,dAdt+dAdt_SD,'b-',alpha=0.2)
plt.plot(Sub_conc,dAdt-dAdt_SD,'b-',alpha=0.2)
plt.xlabel('Concentration (uM)')
plt.ylabel('dAdt (uM s-1)')
plt.title('dAdt vs Concentration')
plt.legend(fontsize='x-small')
plt.grid(True)
plt.show()
'''


"\n#Remove commenting out for plotting\n#The part below is a visual check(plotting shortened_[A]t with error vs time)\nplt.plot(Sub_conc,dAdt,'r.',markersize=15)\nplt.plot(Sub_conc,dAdt+dAdt_SD,'b-',alpha=0.2)\nplt.plot(Sub_conc,dAdt-dAdt_SD,'b-',alpha=0.2)\nplt.xlabel('Concentration (uM)')\nplt.ylabel('dAdt (uM s-1)')\nplt.title('dAdt vs Concentration')\nplt.legend(fontsize='x-small')\nplt.grid(True)\nplt.show()\n"

In [19]:
#Apply curve_fit with the same scaling/resclaing to find the slope of the graph where y = dAdt and x = Substrate concentration
#In the right units, slope = kcat/KM * [E]0 where [E]0 = initial enzyme concentration
Sub_conc_norm = (Sub_conc-np.mean(Sub_conc))/np.std(Sub_conc)
dAdt_norm = (dAdt-np.mean(dAdt))/np.std(dAdt)
dAdt_norm_std = dAdt_SD/np.std(dAdt)

popt, pcov = curve_fit(linear_model, Sub_conc_norm, dAdt_norm, sigma=dAdt_norm_std, absolute_sigma=True)
a, b = popt
std_errors = np.sqrt(pcov[0,0])
#print(pcov) to check that the diagonal values of the matrix are larger than the off-diagonal ones

#Rescale the values
factor = np.std(dAdt)/np.std(Sub_conc)
slope = a*factor #in units of s-1
b = b*factor #in units of uM/s
slope_SD = std_errors*factor #in units of s-1

#Convert from units of uM to units of M
b_M = b*factor/(10**6)

#Printing slope and b where y = slope*x + b
print("Value of the slope (in s-1) =", slope)
print("Value of b (in M s-1) =", b_M)

#To find kcat over Km, use the formula slope = kcat/KM*[E]0
#[E]0 used in this experiment was 25 nM, which is 25/(10**9) M
kcat_over_Km = slope/(25/(10**9)) # divided by [E]0 in M
kcat_over_Km_SD = slope_SD/(25/(10**9)) #standard deviation is 68% confidence interval
kcat_over_Km_95_conf_int = kcat_over_Km_SD*1.96

#Final values of Kcat/Km and the 95% confidence interval
print("Kcat/Km =", kcat_over_Km)
print("Kcat/Km 95% confidence interval =", kcat_over_Km_95_conf_int)

#Converting dAdt, dAdt_SD, and Concentrations from units of uM/s to units of M/s
dAdt_M = np.array(dAdt)/(10**6)
dAdt_SD_M = np.array(dAdt_SD)/(10**6)
Sub_conc_M = np.array(Sub_conc)/(10**6)

#Also, generate y values using the fitted linear equation for plotting the fit
y = []

for i in range(8):
    y_value = slope*Sub_conc_M[i]+b_M
    y.append(y_value)

#Exporting dAdt_M values, STD values on dAdt_M, and Concentrations_M values into Excel for plotting 
dAdt_df = pandas.DataFrame(dAdt_M)
dAdt_SD_df = pandas.DataFrame(dAdt_SD_M)
Sub_conc_df = pandas.DataFrame(Sub_conc_M)
y_df = pandas.DataFrame(y)
kcat_over_Km_df = pandas.DataFrame([kcat_over_Km])
kcat_over_Km_95_conf_int_df = pandas.DataFrame([kcat_over_Km_95_conf_int])
b_M_df = pandas.DataFrame([b_M])
slope_df = pandas.DataFrame([slope])

with pandas.ExcelWriter('OUTPUT_kcat_kM_experiment_Opt-2_dAdt_std_Concentrations_fitted_y_values_Kcat_Km_error_slope_b_int.xlsx', engine='openpyxl') as writer:
    dAdt_df.to_excel(writer, sheet_name='dAdt')
    dAdt_SD_df.to_excel(writer, sheet_name='dAdt_std')
    Sub_conc_df.to_excel(writer, sheet_name='Concentrations')
    y_df.to_excel(writer, sheet_name='Fitted y')
    kcat_over_Km_df.to_excel(writer, sheet_name='kcat over Km')
    kcat_over_Km_95_conf_int_df.to_excel(writer, sheet_name='kcat over Km 95% conf inter')
    b_M_df.to_excel(writer, sheet_name='b_M')
    slope_df.to_excel(writer, sheet_name='slope')

plt.plot(Sub_conc_M,dAdt_M,'r.',markersize=15)
plt.plot(Sub_conc_M,dAdt_M+dAdt_SD_M,'k.',alpha=0.2)
plt.plot(Sub_conc_M,dAdt_M-dAdt_SD_M,'k.',alpha=0.2)
plt.plot(Sub_conc_M,dAdt_M+dAdt_SD_M,'b-',alpha=0.2)
plt.plot(Sub_conc_M,dAdt_M-dAdt_SD_M,'b-',alpha=0.2)
plt.plot(Sub_conc_M,slope*np.array(Sub_conc_M)+b_M,'k--')
plt.xlabel('Concentration (M)')
plt.ylabel('dAdt (M s-1)')
plt.title('dAdt vs Concentration')
plt.legend(fontsize='x-small')
plt.grid(True)
plt.show()

    

Value of the slope (in s-1) = 4.083490244556711e-05
Value of b (in M s-1) = -1.4809525610675774e-17
Kcat/Km = 1633.3960978226844
Kcat/Km 95% confidence interval = 315.33777891298354


No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


# 