In [2]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau, pearsonr, spearmanr

def spearsonr_pval(x,y):
    return spearmanr(x,y)[1]

data_a = pd.read_csv("06232022-pos-M-F-filtered.csv")

# this is metadata info
# careful for the range, [a,b] mean a <= X < b. Also python is 0 based index.
# column 8 is the CD4 indicator data.
data_M = data_a.iloc[:, np.r_[0, 8]]
data_M = data_M.set_index('filename')
# this is metabolites, which is features.
data_F = data_a.iloc[:, np.r_[0, 25:2082]]
data_F = data_F.set_index('filename')

# get the two part of column names: metadata colum name and features column name.
M_column_list = data_M.head(0)
print(M_column_list)
F_column_list = data_F.head(0)
print(F_column_list)

df_col_list = []
df_corr_list = []
df_pval_list = []

for m_col in M_column_list:
    for f_col in F_column_list:
        col_1 = data_M[m_col]
        col_2 = data_F[f_col]
        corr_col_1_2 = col_1.corr(col_2, method='spearman')
        corr_pval_col_1_2 = col_1.corr(col_2, method=spearsonr_pval)
        df_col_list.append(m_col + "-" + f_col)
        df_corr_list.append(corr_col_1_2)
        df_pval_list.append(corr_pval_col_1_2)

print(df_col_list)
print(df_corr_list)
print(df_pval_list)

import statsmodels.api
dr_fdr_pval = statsmodels.stats.multitest.fdrcorrection(df_pval_list, method='indep')
df_final = pd.DataFrame(list(zip(df_col_list, df_corr_list, df_pval_list, dr_fdr_pval[0], dr_fdr_pval[1])), columns=['col_name','corr', 'corr_pval', 'FDR_rejected', 'FDR_pval'])

df_final.to_csv('pos-filtered-CD4-correlation-p-values.csv', index=False)

Empty DataFrame
Columns: [CD4]
Index: []
Empty DataFrame
Columns: [aq_pos_mass_297.133_3.694_435, aq_pos_mass_133.065_3.695_779, aq_pos_mass_436.308_4.439_781, aq_pos_mass_447.205_4.267_2450, aq_pos_mass_818.423_4.2_342, aq_pos_mass_358.295_4.2_2238, aq_pos_mass_810.555_4.446_445, aq_pos_mass_766.529_4.452_358, aq_pos_mass_304.211_3.386_2120, aq_pos_mass_898.609_4.434_2367, aq_pos_mass_230.247_4.163_793, aq_pos_mass_854.583_4.439_832, aq_pos_mass_458.321_4.433_771, aq_pos_mass_342.263_4.076_3218, org_pos_mass_372.311_4.296_1457, aq_pos_mass_722.504_4.459_310, org_pos_mass_398.326_4.356_1442, aq_pos_mass_221.042_0.285_187, aq_pos_mass_480.335_4.429_798, aq_pos_mass_828.675_3.627_1190, aq_pos_mass_386.326_4.37_2420, aq_pos_mass_304.212_3.447_2134, aq_pos_mass_710.366_3.627_2224, aq_pos_mass_369.182_4.33_2742, aq_pos_mass_226.18_3.918_256, aq_pos_mass_678.478_4.466_401, aq_pos_mass_200.145_0.645_2717, org_pos_mass_400.342_4.46_1230, org_pos_mass_444.369_4.426_1705, aq_pos_mass_156.077_0.3