In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings
import seaborn as sns
import networkx as nx
import matplotlib.colors as mcolors
from scipy import stats
from scipy.stats import f_oneway, kruskal
from statsmodels.stats.multicomp import MultiComparison
from scikit_posthocs import posthoc_dunn
# pd.options.display.max_rows = None
# pd.options.display.max_columns = None
import glob as glob
import math
pd.set_option('display.max_rows', None)  # To display all rows
pd.set_option('display.max_columns', None)  # To display all columns
from scipy.stats import shapiro

In [11]:
#load in master dataset csv 
gm_3q = pd.read_csv('graph_metrics_adni_a4_bootstrapped_3quant.csv')
print(gm_3q['Centiloid Quantile'].unique())

[0 1 2]


In [12]:
adni_gm = gm_3q[gm_3q['Dataset']=='ADNI']
a4_gm = gm_3q[gm_3q['Dataset']=='A4']
print(adni_gm['Dataset'].nunique()) #looks good
print(a4_gm['Dataset'].nunique())

1
1


In [13]:
print(adni_gm.head())

  Dataset  Centiloid Quantile  Weighted Clustering Coefficient  \
0    ADNI                   0                         0.068934   
1    ADNI                   0                         0.083235   
2    ADNI                   0                         0.058079   
3    ADNI                   0                         0.070969   
4    ADNI                   0                         0.067504   

   Weighted Avg. Shortest Path Length  Weighted Small World  
0                            1.842079              0.325390  
1                            2.203289              0.329465  
2                            2.307050              0.207186  
3                            2.092218              0.308219  
4                            2.123366              0.281602  


Need to determine why effect size is NAN 

In [15]:
#new code with eta squared forr anova and epsilon squared for ks
def perform_anova_kruskal_bonferroni(df1, df2, metrics, group_column):
    anova_results = []
    
    for df, name in [(df1, 'ADNI'), (df2, 'A4')]:
        for metric in metrics:
            groups = df[group_column].unique()
            group_data = [df[df[group_column] == group][metric].dropna() for group in groups]
            num_comparisons = len(group_data)
            total_n = sum([len(group) for group in group_data])
            
            if metric != 'Weighted Small World':
                # ANOVA Test
                result = f_oneway(*group_data)
                p_adjusted = round(result.pvalue * num_comparisons, 3)
                significant = 'Yes' if p_adjusted < 0.05 else 'No'
                
                # Eta squared calculation
                ss_between = sum([(group.mean() - df[metric].mean())**2 * len(group) for group in group_data])
                ss_total = sum([(value - df[metric].mean())**2 for value in df[metric].dropna()])
                eta_squared = ss_between / ss_total if ss_total != 0 else np.nan  # Avoid division by zero
                
                effect_size = eta_squared
            else:
                # Kruskal-Wallis Test
                result = kruskal(*group_data)
                p_adjusted = round(result.pvalue * num_comparisons, 3)
                significant = 'Yes' if p_adjusted < 0.05 else 'No'
                
                # Epsilon squared calculation for non-parametric test
                epsilon_squared = result.statistic / (total_n - 1) if total_n > 1 else np.nan  # Avoid division by zero
                
                effect_size = epsilon_squared
            
            anova_results.append({
                'Dataset': name,
                'Metric': metric,
                'Test': result.__class__.__name__,
                'Statistic': round(result.statistic, 3),
                'p-value': result.pvalue,
                'Bonferroni_p-value': p_adjusted,
                'Significant': significant,
                'Effect Size': round(effect_size, 3) if not np.isnan(effect_size) else np.nan
            })
    
    return pd.DataFrame(anova_results)




In [16]:
#testing functions 
metrics = ['Weighted Clustering Coefficient', 'Weighted Avg. Shortest Path Length', 'Weighted Small World']
anova_results = perform_anova_kruskal_bonferroni(adni_gm, a4_gm, metrics, 'Centiloid Quantile')
print(anova_results)



  Dataset                              Metric            Test  Statistic  \
0    ADNI     Weighted Clustering Coefficient  F_onewayResult    567.442   
1    ADNI  Weighted Avg. Shortest Path Length  F_onewayResult    598.081   
2    ADNI                Weighted Small World   KruskalResult   1528.090   
3      A4     Weighted Clustering Coefficient  F_onewayResult    570.450   
4      A4  Weighted Avg. Shortest Path Length  F_onewayResult   2085.018   
5      A4                Weighted Small World   KruskalResult   1954.486   

         p-value  Bonferroni_p-value Significant  Effect Size  
0  1.040341e-209                 0.0         Yes        0.275  
1  2.732263e-219                 0.0         Yes        0.285  
2   0.000000e+00                 0.0         Yes        0.510  
3  1.175259e-210                 0.0         Yes        0.276  
4   0.000000e+00                 0.0         Yes        0.582  
5   0.000000e+00                 0.0         Yes        0.652  


Do One Way ANOVA test between the quartile groups for weighted clustering coefficent and shortest path leng, do KS for small world 
- only do bonferroni because it's the most strict so don't need tukey or dunn

In [17]:
def split_results_by_dataset(df):
    adni_results = df[df['Dataset'] == 'ADNI']
    a4_results = df[df['Dataset'] == 'A4']
    return adni_results, a4_results

adni_anova_results, a4_anova_results = split_results_by_dataset(anova_results)



In [18]:
print(adni_anova_results)

  Dataset                              Metric            Test  Statistic  \
0    ADNI     Weighted Clustering Coefficient  F_onewayResult    567.442   
1    ADNI  Weighted Avg. Shortest Path Length  F_onewayResult    598.081   
2    ADNI                Weighted Small World   KruskalResult   1528.090   

         p-value  Bonferroni_p-value Significant  Effect Size  
0  1.040341e-209                 0.0         Yes        0.275  
1  2.732263e-219                 0.0         Yes        0.285  
2   0.000000e+00                 0.0         Yes        0.510  


In [19]:
print(a4_anova_results)

  Dataset                              Metric            Test  Statistic  \
3      A4     Weighted Clustering Coefficient  F_onewayResult    570.450   
4      A4  Weighted Avg. Shortest Path Length  F_onewayResult   2085.018   
5      A4                Weighted Small World   KruskalResult   1954.486   

         p-value  Bonferroni_p-value Significant  Effect Size  
3  1.175259e-210                 0.0         Yes        0.276  
4   0.000000e+00                 0.0         Yes        0.582  
5   0.000000e+00                 0.0         Yes        0.652  
