In [None]:
import pandas as pd
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

df = pd.read_csv('LSDS-112_Immunoassay_Roy_ECLIA_AllTissues_SUBMITTED.csv').dropna(how='all')
cytokines = [col for col in df.columns if col not in ['Sample ID', 'Group']]

# Compare Flight vs Ground groups
results = []
for cyto in cytokines:
    g1 = df[df['Group'] == 'Flight'][cyto].dropna()
    g2 = df[df['Group'] == 'Ground'][cyto].dropna()
    if len(g1) < 2 or len(g2) < 2: continue
    stat, p = mannwhitneyu(g1, g2)
    effect = 1 - (2 * stat) / (len(g1) * len(g2))
    results.append([cyto, g1.mean(), g2.mean(), g1.mean() - g2.mean(), effect, p])

# results DataFrame
res_df = pd.DataFrame(results, columns=['Cytokine', 'Flight_Mean', 'Ground_Mean', 
                                        'Mean_Diff', 'Effect_Size', 'p_value'])
res_df['p_adj'] = fdrcorrection(res_df['p_value'])[1]
print("\nSimplified Cytokine Comparison:")
print(res_df.sort_values('p_adj').round(4))

# Simple ML Accuracy (Logistic Regression)
X = df[cytokines].dropna()
y = LabelEncoder().fit_transform(df.loc[X.index, 'Group'])
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
model = LogisticRegression(max_iter=1000).fit(X_train, y_train)
acc = accuracy_score(y_test, model.predict(X_test))
print(f"\nClassification Accuracy (Logistic Regression): {acc*100:.2f}%")



Simplified Cytokine Comparison:
   Cytokine  Flight_Mean  Ground_Mean  Mean_Diff  Effect_Size  p_value  p_adj
0     TNF-α       0.0500       0.0747    -0.0247         0.28   0.5476    1.0
1      IL-2       0.2362       0.2236     0.0125        -0.04   1.0000    1.0
2     IFN-γ       0.0340       0.0324     0.0016        -0.04   1.0000    1.0
3      IL-5       0.1868       0.2000    -0.0132         0.20   0.6905    1.0
4      IL-6       2.1740       2.0819     0.0921        -0.04   1.0000    1.0
5  IL-12p70       3.8911       4.8232    -0.9320         0.44   0.3095    1.0
6     IL-10       0.3846       0.4272    -0.0426         0.20   0.6905    1.0

Classification Accuracy (Logistic Regression): 40.00%
