In [None]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from statsmodels.stats.multitest import multipletests

def find_significant_edges_continuous(
    data_df: pd.DataFrame,
    clinical_var: str = 'sara',
    feature_prefix: str = 'f',
    alpha: float = 0.05,
    method: str = 'fdr_bh',
    top_n: int = 20
) -> pd.DataFrame:
    
    feature_cols = [c for c in data_df.columns if c.startswith(feature_prefix)]
    if not feature_cols:
        raise ValueError(f"No columns start with '{feature_prefix}'")
    
    results = []
    y = data_df[clinical_var].values
    for edge in feature_cols:
        x = data_df[edge].values

        mask = ~np.isnan(x) & ~np.isnan(y)
        if mask.sum() < 5:
            continue
        corr, pval = pearsonr(x[mask], y[mask])
        results.append({'edge': edge, 'corr': corr, 'pvalue': pval})

    res_df = pd.DataFrame(results)
    if res_df.empty:
        raise RuntimeError("No valid feature‐clinical pairs to test.")


    res_df['qvalue'] = multipletests(res_df['pvalue'].values, alpha=alpha, method=method)[1]


    res_df = res_df.sort_values('qvalue').reset_index(drop=True)
    return res_df.head(top_n)


if __name__ == "__main__":

    df = pd.read_csv('clinical_modeling_dataset.csv')
    clinical_score = 'sara'  
    sig_edges = find_significant_edges_continuous(
        data_df=df,
        clinical_var=clinical_score,
        feature_prefix='f',
        alpha=0.05,
        method='fdr_bh',
        top_n=30
    )

    sig_edges.to_csv(f'significant_edges_by_{clinical_score}.csv', index=False)
    print(f"Top edges correlated with {clinical_score}:")
    print(sig_edges)


Top edges correlated with sara:
      edge      corr    pvalue    qvalue
0    f6556 -0.562527  0.000032  0.174209
1    f9548 -0.570355  0.000023  0.174209
2    f6435 -0.557735  0.000038  0.174209
3    f6557 -0.536686  0.000084  0.288051
4    f6604 -0.505799  0.000245  0.325323
5    f6460 -0.495444  0.000343  0.325323
6    f8586  0.483512  0.000499  0.325323
7    f7561  0.494256  0.000356  0.325323
8    f7085 -0.494352  0.000355  0.325323
9    f3738 -0.484279  0.000487  0.325323
10   f7183 -0.510594  0.000209  0.325323
11   f7780  0.487625  0.000439  0.325323
12   f1156  0.495804  0.000339  0.325323
13   f6340 -0.487835  0.000436  0.325323
14   f3605 -0.488086  0.000433  0.325323
15   f6924  0.489184  0.000418  0.325323
16   f6315 -0.497861  0.000317  0.325323
17    f710  0.507941  0.000228  0.325323
18    f709  0.492646  0.000375  0.325323
19   f6618 -0.484319  0.000487  0.325323
20   f6605 -0.521947  0.000142  0.325323
21   f6930 -0.481635  0.000528  0.328978
22   f8977 -0.479381  0.0