In [306]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings
import seaborn as sns
import networkx as nx
import matplotlib.colors as mcolors
from scipy import stats
from scipy.stats import f_oneway, kruskal
from statsmodels.stats.multicomp import MultiComparison
from scikit_posthocs import posthoc_dunn
# pd.options.display.max_rows = None
# pd.options.display.max_columns = None
import glob as glob
import math
pd.set_option('display.max_rows', None)  # To display all rows
pd.set_option('display.max_columns', None)  # To display all columns
from scipy.stats import shapiro

In [307]:
#load in master dataset csv 
gm_3q = pd.read_csv('graph_metrics_adni_a4_bootstrapped_3quant.csv')
print(gm_3q['Centiloid Quantile'].unique())

[0 1 2]


In [308]:
adni_gm = gm_3q[gm_3q['Dataset']=='ADNI']
a4_gm = gm_3q[gm_3q['Dataset']=='A4']
print(adni_gm['Dataset'].nunique()) #looks good
print(a4_gm['Dataset'].nunique())

1
1


In [309]:
print(adni_gm.head())

  Dataset  Centiloid Quantile  Weighted Clustering Coefficient  \
0    ADNI                   0                         0.068934   
1    ADNI                   0                         0.083235   
2    ADNI                   0                         0.058079   
3    ADNI                   0                         0.070969   
4    ADNI                   0                         0.067504   

   Weighted Avg. Shortest Path Length  Weighted Small World  
0                            1.842079              0.325390  
1                            2.203289              0.329465  
2                            2.307050              0.207186  
3                            2.092218              0.308219  
4                            2.123366              0.281602  


In [325]:
def perform_anova_kruskal_bonferroni(df1, df2, metrics, group_column):
    anova_results = []
    
    for df, name in [(df1, 'ADNI'), (df2, 'A4')]:
        for metric in metrics:
            group_data = [df[df[group_column] == group][metric] for group in df[group_column].unique()]
            num_comparisons = len(group_data)
            
            # ANOVA
            if metric != 'Weighted Small World':
                result = f_oneway(*group_data)
                p_adjusted_anova = round(result.pvalue * num_comparisons, 3)  # Round to 3 decimal places
                significant_anova = 'Yes' if p_adjusted_anova < 0.05 else 'No'
                
                anova_results.append({
                    'Dataset': name,
                    'Metric': metric,
                    'Test': 'ANOVA',
                    'Statistic': round(result.statistic, 3),  # Round to 3 decimal places
                    'p-value': result.pvalue,        # Round to 3 decimal places
                    'Bonferroni_p-value': p_adjusted_anova,  # New column for Bonferroni-adjusted p-value
                    'Significant': significant_anova
                })
                
            # Kruskal-Wallis
            else:
                result = kruskal(*group_data)
                p_adjusted_kw = round(result.pvalue * num_comparisons, 3)  # Round to 3 decimal places
                significant_kw = 'Yes' if p_adjusted_kw < 0.05 else 'No'
                
                anova_results.append({
                    'Dataset': name,
                    'Metric': metric,
                    'Test': 'Kruskal-Wallis',
                    'Statistic': round(result.statistic, 3),  # Round to 3 decimal places
                    'p-value': result.pvalue,     
                    'Bonferroni_p-value': p_adjusted_kw,  # New column for Bonferroni-adjusted p-value
                    'Significant': significant_kw
                })
                
    return pd.DataFrame(anova_results)



In [326]:
#testing functions 
metrics = ['Weighted Clustering Coefficient', 'Weighted Avg. Shortest Path Length', 'Weighted Small World']
anova_results = perform_anova_kruskal_bonferroni(adni_gm, a4_gm, metrics, 'Centiloid Quantile')
print(anova_results)



  Dataset                              Metric            Test  Statistic  \
0    ADNI     Weighted Clustering Coefficient           ANOVA    567.442   
1    ADNI  Weighted Avg. Shortest Path Length           ANOVA    598.081   
2    ADNI                Weighted Small World  Kruskal-Wallis   1528.090   
3      A4     Weighted Clustering Coefficient           ANOVA    570.450   
4      A4  Weighted Avg. Shortest Path Length           ANOVA   2085.018   
5      A4                Weighted Small World  Kruskal-Wallis   1954.486   

         p-value  Bonferroni_p-value Significant  
0  1.040341e-209                 0.0         Yes  
1  2.732263e-219                 0.0         Yes  
2   0.000000e+00                 0.0         Yes  
3  1.175259e-210                 0.0         Yes  
4   0.000000e+00                 0.0         Yes  
5   0.000000e+00                 0.0         Yes  


Do One Way ANOVA test between the quartile groups for weighted clustering coefficent and shortest path leng, do KS for small world 
- only do bonferroni because it's the most strict so don't need tukey or dunn

In [327]:
def split_results_by_dataset(df):
    adni_results = df[df['Dataset'] == 'ADNI']
    a4_results = df[df['Dataset'] == 'A4']
    return adni_results, a4_results

adni_anova_results, a4_anova_results = split_results_by_dataset(anova_results)



In [328]:
print(adni_anova_results)

  Dataset                              Metric            Test  Statistic  \
0    ADNI     Weighted Clustering Coefficient           ANOVA    567.442   
1    ADNI  Weighted Avg. Shortest Path Length           ANOVA    598.081   
2    ADNI                Weighted Small World  Kruskal-Wallis   1528.090   

         p-value  Bonferroni_p-value Significant  
0  1.040341e-209                 0.0         Yes  
1  2.732263e-219                 0.0         Yes  
2   0.000000e+00                 0.0         Yes  


In [329]:
print(a4_anova_results)

  Dataset                              Metric            Test  Statistic  \
3      A4     Weighted Clustering Coefficient           ANOVA    570.450   
4      A4  Weighted Avg. Shortest Path Length           ANOVA   2085.018   
5      A4                Weighted Small World  Kruskal-Wallis   1954.486   

         p-value  Bonferroni_p-value Significant  
3  1.175259e-210                 0.0         Yes  
4   0.000000e+00                 0.0         Yes  
5   0.000000e+00                 0.0         Yes  


Interpretation of A4 Tukey: 