In [1]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import kendalltau, pearsonr, spearmanr

def spearsonr_pval(x,y):
    return spearmanr(x,y)[1]

data_a = pd.read_csv("RVB-75DPI-metadata-feature-table.csv")

data_restore = pd.read_csv("RVB-mass-restored.csv")
data_restore_list = data_restore.filename.to_list()
data_restore_list.insert(0, "MouseID")

# this is metadata info, which is extra baylor information.
# careful for the range, [a,b] mean a <= X < b. Also python is 0 based index.
data_M = data_a.iloc[:, np.r_[0, 1:8]]
data_M = data_M.set_index('MouseID')
# this is metabolites, which is features.
data_F = data_a[data_restore_list]
data_F = data_F.set_index('MouseID')

# get the two part of column names: metadata colum name and features column name.
M_column_list = data_M.head(0)
print(M_column_list)
F_column_list = data_F.head(0)
print(F_column_list)

df_col_list = []
df_corr_list = []
df_pval_list = []

for m_col in M_column_list:
    for f_col in F_column_list:
        col_1 = data_M[m_col]
        col_2 = data_F[f_col]
        corr_col_1_2 = col_1.corr(col_2, method='spearman')
        corr_pval_col_1_2 = col_1.corr(col_2, method=spearsonr_pval)
        df_col_list.append(m_col + "-" + f_col)
        df_corr_list.append(corr_col_1_2)
        df_pval_list.append(corr_pval_col_1_2)

print(df_col_list)
print(df_corr_list)
print(df_pval_list)

import statsmodels.api

df_final = pd.DataFrame(list(zip(df_col_list, df_corr_list, df_pval_list)), columns=['col_name','corr', 'corr_pval'])

df_final['corr'].replace('', float("NaN"), inplace=True)
df_final.dropna(subset=['corr'], inplace=True)

dr_fdr_pval = statsmodels.stats.multitest.fdrcorrection(list(df_final["corr_pval"]), method='indep')
df_final['FDR_rejected'] = dr_fdr_pval[0]
df_final['FDR_pval'] = dr_fdr_pval[1]

df_final.to_csv('RVB-75DPI-restored-corr-pval.csv', index=False)

Empty DataFrame
Columns: [Heart.weight..Body.weight.ratio, Liver.weight..Body.weight.ratio, Ejection.Fraction, PR.Interval..s., QTc..s., P.Amplitude..V., CD3.CD8.IFNg.]
Index: []
Empty DataFrame
Columns: [X277.216_3.018, X295.227_3.017, X327.232_3.01, X522.357_2.957, X544.34_2.89, X568.341_2.887, X590.322_2.887, X604.362_2.948, X142.035_0.315, X214.181_2.634, X236.163_2.635, X242.212_2.78, X242.586_2.766, X281.102_2.768, X300.199_2.264, X308.295_3.045, X324.29_3.082, X409.162_2.768, X506.255_3.172, X795.335_2.768, X801.313_2.768, X801.815_2.767, X132.102_0.307, X226.181_2.61, X256.227_2.847, X278.186_2.23, X298.2_2.676, X348.288_3.044, X502.294_2.879]
Index: []

[0 rows x 29 columns]
['Heart.weight..Body.weight.ratio-X277.216_3.018', 'Heart.weight..Body.weight.ratio-X295.227_3.017', 'Heart.weight..Body.weight.ratio-X327.232_3.01', 'Heart.weight..Body.weight.ratio-X522.357_2.957', 'Heart.weight..Body.weight.ratio-X544.34_2.89', 'Heart.weight..Body.weight.ratio-X568.341_2.887', 'Heart.we

