In [1]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau, pearsonr, spearmanr

def spearsonr_pval(x,y):
    return spearmanr(x,y)[1]

data_full = pd.read_csv("LA-142DPI-metadata-feature-table.csv")
data_a = data_full[data_full["Treatmentgroup"].isin(["Group1C","Group3E"])]

data_not_restore = pd.read_csv("LA-mass-not-restored.csv")
data_not_restore_list = data_not_restore.filename.to_list()
data_not_restore_list.insert(0, "MouseID")

# this is metadata info, which is extra baylor information.
# careful for the range, [a,b] mean a <= X < b. Also python is 0 based index.
data_M = data_a.iloc[:, np.r_[0, 2:9]]
data_M = data_M.set_index('MouseID')
# this is metabolites, which is features.
data_F = data_a[data_not_restore_list]
data_F = data_F.set_index('MouseID')

# get the two part of column names: metadata colum name and features column name.
M_column_list = data_M.head(0)
print(M_column_list)
F_column_list = data_F.head(0)
print(F_column_list)

df_col_list = []
df_corr_list = []
df_pval_list = []

for m_col in M_column_list:
    for f_col in F_column_list:
        col_1 = data_M[m_col]
        col_2 = data_F[f_col]
        corr_col_1_2 = col_1.corr(col_2, method='spearman')
        corr_pval_col_1_2 = col_1.corr(col_2, method=spearsonr_pval)
        df_col_list.append(m_col + "-" + f_col)
        df_corr_list.append(corr_col_1_2)
        df_pval_list.append(corr_pval_col_1_2)

print(df_col_list)
print(df_corr_list)
print(df_pval_list)

import statsmodels.api
dr_fdr_pval = statsmodels.stats.multitest.fdrcorrection(df_pval_list, method='indep')
df_final = pd.DataFrame(list(zip(df_col_list, df_corr_list, df_pval_list, dr_fdr_pval[0], dr_fdr_pval[1])), columns=['col_name','corr', 'corr_pval', 'FDR_rejected', 'FDR_pval'])

df_final.to_csv('LA-142DPI-not-restored-corr-pval.csv', index=False)

Empty DataFrame
Columns: [Heart.Weight..Body.Weight, Liver.Weight..Body.Weight, Ejection.Fraction, PR.Interval..s., QTc..s., P.Amplitude..V., CD3.CD8.IFNg.]
Index: []
Empty DataFrame
Columns: [X137.046_0.35, X166.053_0.305, X428.205_2.248, X444.332_3.016, X472.363_3.023, X488.359_3.01, X516.39_3.018, X532.385_3.004, X560.416_3.006, X576.411_2.999, X604.442_3.001, X620.437_2.993, X648.468_2.995, X650.403_3.014, X664.463_2.992, X666.433_3.092, X708.489_2.982, X752.515_2.977, X898.581_3.224]
Index: []
['Heart.Weight..Body.Weight-X137.046_0.35', 'Heart.Weight..Body.Weight-X166.053_0.305', 'Heart.Weight..Body.Weight-X428.205_2.248', 'Heart.Weight..Body.Weight-X444.332_3.016', 'Heart.Weight..Body.Weight-X472.363_3.023', 'Heart.Weight..Body.Weight-X488.359_3.01', 'Heart.Weight..Body.Weight-X516.39_3.018', 'Heart.Weight..Body.Weight-X532.385_3.004', 'Heart.Weight..Body.Weight-X560.416_3.006', 'Heart.Weight..Body.Weight-X576.411_2.999', 'Heart.Weight..Body.Weight-X604.442_3.001', 'Heart.Weight.