In [8]:
import pandas as pd
from scipy import stats


In [6]:
data=pd.read_csv('adam.csv')

In [15]:
for vara in ['EOS', 'IgE']:
    for varb in ['Age', 'ONSET']:
        coef, pval=stats.pearsonr(data[vara], data[varb])
        print (f'{vara} vs. {varb}: pearson coef={coef.round(4)} pval={pval.round(4)}')

EOS vs. Age: pearson coef=0.2951 pval=0.22
EOS vs. ONSET: pearson coef=-0.4061 pval=0.0845
IgE vs. Age: pearson coef=0.1638 pval=0.5028
IgE vs. ONSET: pearson coef=-0.1523 pval=0.5337


In [16]:
import numpy as np
from scipy import stats

def pearsonr_ci(x,y,alpha=0.05):
    ''' calculate Pearson correlation along with the confidence interval using scipy and numpy
    Parameters
    ----------
    x, y : iterable object such as a list or np.array
      Input for correlation calculation
    alpha : float
      Significance level. 0.05 by default
    Returns
    -------
    r : float
      Pearson's correlation coefficient
    pval : float
      The corresponding p value
    lo, hi : float
      The lower and upper bound of confidence intervals
    '''

    r, p = stats.pearsonr(x,y)
    r_z = np.arctanh(r)
    se = 1/np.sqrt(x.size-3)
    z = stats.norm.ppf(1-alpha/2)
    lo_z, hi_z = r_z-z*se, r_z+z*se
    lo, hi = np.tanh((lo_z, hi_z))
    return r, p, lo, hi

In [20]:
for vara in ['EOS', 'IgE']:
    for varb in ['Age', 'ONSET']:
        coef, pval,low,high=pearsonr_ci(data[vara], data[varb])
        print (f'{vara} vs. {varb}: pearson coef={coef.round(4)} pval={pval.round(4)} CI: [{low.round(4)}-{high.round(4)}]')

EOS vs. Age: pearson coef=0.2951 pval=0.22 CI: [-0.1837-0.6607]
EOS vs. ONSET: pearson coef=-0.4061 pval=0.0845 CI: [-0.7263-0.059]
IgE vs. Age: pearson coef=0.1638 pval=0.5028 CI: [-0.3137-0.5752]
IgE vs. ONSET: pearson coef=-0.1523 pval=0.5337 CI: [-0.5673-0.3244]
