In [8]:
# Written by Kalon Overholt
# Young Lab
# 3/29/2022
# This script calculates binding isotherms obtained with fluorescent polarization data for equilibrium protein-ligand interactions using a ligand depletion model

In [9]:
# Import libraries
import scipy as sp
from pylab import *
import numpy as np
import pandas as pd
import seaborn as sns
import collections
from IPython.display import clear_output
import random
sns.set_style("ticks")
from scipy.optimize import curve_fit
from scipy.stats.distributions import  t

import matplotlib as mpl
mpl.use('Agg')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['text.usetex'] = False
mpl.rcParams['font.sans-serif'] = 'Arial'
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['figure.dpi'] = 300
from matplotlib import pyplot as plt
%matplotlib inline

In [10]:
# Define parameters
L=10 # Ligand (RNA) concentration (nM)

In [11]:
# Create function handle
def modelfun(x,b0,b1,b2):
    return b1+(b2-b1)*(L+x+b0-np.sqrt((L+x+b0)**2-4*L*x))/(2*L)

In [12]:
# From https://kitchingroup.cheme.cmu.edu/blog/2013/02/18/Nonlinear-curve-fitting-with-confidence-intervals/
def  nl_95_ci(pars, pcov,Y):
    alpha = 0.05 # 95% confidence interval

    n = len(Y)    # number of data points
    p = len(pars) # number of parameters

    dof = max(0, n-p) # number of degrees of freedom to calculate the t-value
    tval = t.ppf(1.0 - alpha / 2.0, dof) # student-t value for the confidence level

    # Iterate through the curve fit parameters and the diagonal elements of the covariance matrix
    # The diagonals provide the variance of the parameter estimate
    for i, p, var in zip(range(3), pars, np.diag(pcov)):
        sigma = sqrt(var)
        lb = p - sigma*tval
        ub = p + sigma*tval
        
        if i==0:
            ci_kd=[lb,ub]
            sigma_kd=sigma
            mean_kd=p
            
    return (sigma_kd,mean_kd)

In [13]:
# https://www.graphpad.com/support/faqid/1765/
# Standard errors are quare root of diagnonals of covariance matrix
# http://sia.webpopix.org/nonlinearRegression.html#standard-errors-of-the-parameter-estimates
# https://stackoverflow.com/questions/25234996/getting-standard-error-associated-with-parameter-estimates-from-scipy-optimize-c
def ttest_student(sd1,sd2,mean1,mean2,df):
    
    # Define confidence interval
    alpha = 0.05
    
    # Calculate variances
    var1=sd1**2
    var2=sd2**2
    
    # https://stats.stackexchange.com/questions/495215/standard-error-standard-deviation-and-variance-confusion
    se1=sd1
    se2=sd2

    print('Ratio of variances=' + str(var1/var2) + ' (Rule of thumb: this must be between 0.25 and 4)')
    
    t_stat = (mean1 - mean2) / sqrt(se1**2+se2**2) # calculate the t-test statistic
    
    cv=t.ppf(1-alpha/2,df) # calculate the critical value

    p = (1 - t.cdf(abs(t_stat), df))*2 # calculate the p-value

    return (p, t_stat, cv,df)

In [14]:
# Read in data
filename='220708_FP_TF_RNA_DNA-analysis-GATA2';
T=pd.read_csv(filename+'.csv',header=None,index_col=0)
proteins=pd.unique(T.index)

nrows=size(T,0)

m=3 # Number of repliates

P1=T.iloc[0,:]
P1=P1.to_numpy()
data1=T.iloc[1:1+m,:]
melt_data1=data1
melt_data1.columns=P1
melt_data1=melt_data1.melt()
Y1=np.mean(data1,axis=0)

# Fit model for fraction ligand bound
p0=[1000, 200, 400] #Define intial guess for the fit parameter for Kd, lower bound, upper bound
popt1, pcov1 = curve_fit(modelfun,melt_data1.variable,melt_data1.value,p0)
b0=popt1[0]
b1=popt1[1]
b2=popt1[2]
kd_1=round(b0)

# Calculate t-statistics for sample 1
(sd1,mean1)= nl_95_ci(popt1, pcov1,melt_data1.value)

P2=T.iloc[4,:]
P2=P2.to_numpy()
data2=T.iloc[5:5+m,:]
melt_data2=data2
melt_data2.columns=P2
melt_data2=melt_data2.melt()
Y2=np.mean(data2,axis=0)

# Fit model for fraction ligand bound
p0=[1000, 200, 400] #Define intial guess for the fit parameter for Kd, lower bound, upper bound
popt2, pcov2 = curve_fit(modelfun,melt_data2.variable,melt_data2.value,p0)
b0=popt2[0]
b1=popt2[1]
b2=popt2[2]
kd_2=round(b0)

# Calculate t-statistics for sample 2
(sd2,mean2)= nl_95_ci(popt2, pcov2,melt_data2.value)

# Conduct two-tailed t-test
n=len(melt_data1)
df=(n-len(p0))*2 # Define sample size for the t-test (used to calculate DoF)
(p, t_stat, cv, df)=ttest_student(sd1,sd2,mean1,mean2,df)

print(f'Kd1: {mean1} Kd2: {mean2}')
print(f'stdev1: {sd1} stdev2: {sd2}')
print(f'Student t-test p value: {p}')

Ratio of variances=0.00022310807505543725 (Rule of thumb: this must be between 0.25 and 4)
Kd1: 348.7303360501127 Kd2: 5061.586901500849
stdev1: 28.652980496452678 stdev2: 1918.2807098596693
Student t-test p value: 0.018243689224037407
