In [3]:
# Written by Kalon Overholt
# Young Lab
# 3/29/2022
# This script calculates binding isotherms obtained with fluorescent polarization data for equilibrium protein-ligand interactions using a ligand depletion model

In [4]:
# Import libraries
import scipy as sp
from pylab import *
import numpy as np
import pandas as pd
import seaborn as sns
import collections
from IPython.display import clear_output
import random
sns.set_style("ticks")
from scipy.optimize import curve_fit
from scipy.stats.distributions import  t

import matplotlib as mpl
mpl.use('Agg')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['text.usetex'] = False
mpl.rcParams['font.sans-serif'] = 'Arial'
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['figure.dpi'] = 300
from matplotlib import pyplot as plt
%matplotlib inline

In [5]:
# Define parameters
L=10 # Ligand (RNA) concentration (nM)

In [6]:
# Create function handle
def modelfun(x,b0,b1,b2):
    return b1+(b2-b1)*(L+x+b0-np.sqrt((L+x+b0)**2-4*L*x))/(2*L)

In [7]:
# From https://kitchingroup.cheme.cmu.edu/blog/2013/02/18/Nonlinear-curve-fitting-with-confidence-intervals/
def  nl_95_ci(pars, pcov,Y):
    alpha = 0.05 # 95% confidence interval

    n = len(Y)    # number of data points
    p = len(pars) # number of parameters

    dof = max(0, n-p) # number of degrees of freedom

    tval = t.ppf(1.0 - alpha / 2.0, dof) # student-t value for the confidence level

    # Iterate through the curve fit parameters and the diagonal elements of the covariance matrix
    # The diagonals provide the variance of the parameter estimate
    for i, p, var in zip(range(n), pars, np.diag(pcov)):
        sigma = var**0.5
        lb = p - sigma*tval
        ub = p + sigma*tval
        
        if i==0:
            ci_kd=[lb,ub]
            sigma_kd=sigma
            mean_kd=p
            
    return (sigma_kd,mean_kd)

In [12]:
def ttest_welch(sd1,sd2,mean1,mean2,n):
    alpha = 0.05
    
    # Calculate variances
    var1=sd1**2
    var2=sd2**2
    
    sed = sqrt(var1/n + var2/n) # standard error on the difference between the samples
    
    t_stat = (mean1 - mean2) / sed # calculate the t-test statistic

    # Degrees of freedom
    df=(var1/n + var2/n)**2 / ( (var1/n)**2 / (n - 1) + (var2/n)**2 / (n - 1) )
    
    cv=t.ppf(1-alpha/2,df) # calculate the critical value

    p = (1 - t.cdf(abs(t_stat), df))*2 # calculate the p-value

    return (p, t_stat, cv,df)

In [13]:
# Read in data
filename='RNA_stat_test';
T=pd.read_csv(filename+'.csv',header=None,index_col=0)
proteins=pd.unique(T.index)

nrows=size(T,0)

n=3; # Number of replicates

P1=T.iloc[0,:]
P1=P1.to_numpy()
data1=T.iloc[1:1+n,:]
Y1=np.mean(data1,axis=0)

# Fit model for fraction ligand bound
p0=[1000, 200, 400] #Define intial guess for the fit parameter for Kd
popt1, pcov1 = curve_fit(modelfun,P1,Y1,p0)
b0=popt1[0]
b1=popt1[1]
b2=popt1[2]
kd_1=round(b0)

# Calculate t-statistics for sample 1
(sd1,mean1)= nl_95_ci(popt1, pcov1,Y1)

P2=T.iloc[4,:]
P2=P2.to_numpy()
data2=T.iloc[5:5+n,:]
Y2=np.mean(data2,axis=0)

# Fit model for fraction ligand bound
p0=[1000, 200, 400] #Define intial guess for the fit parameter for Kd
popt2, pcov2 = curve_fit(modelfun,P2,Y2,p0)
b0=popt2[0]
b1=popt2[1]
b2=popt2[2]
kd_2=round(b0)

# Calculate t-statistics for sample 2
(sd2,mean2)= nl_95_ci(popt2, pcov2,Y2)

# Conduct two-tailed t-test
(p, t_stat, cv, df)=ttest_welch(sd1,sd2,mean1,mean2,n)

print(f'Kd1: {mean1} Kd2: {mean2}')
print(f'stdev1: {sd1} stdev2: {sd2}')
print(f'Welch t-test p value: {p}')

Kd1: 809.6745242332491 Kd2: 2075.069801326464
stdev1: 176.9209694395705 stdev2: 166.6790943788031
Welch t-test p value: 0.0008516249981116442
