# Statistical comparison of classifiers

In [2]:
import pandas as pd
import scipy.stats
import scikit_posthocs as sp

In [3]:
# Re-calculate the p-value according to the Iman-Davenport formula descibed by Demsar
def Iman(ChiSquare, k, N, p):
    import scipy.stats
    Ff = ((N-1)*ChiSquare) / ((N*(k-1))-ChiSquare)
    test_stat = scipy.stats.f.ppf(1-p, k-1, (k-1)*(N-1))
    return [Ff, test_stat]

## ROC-AUC dataset level

First we look at the ROC AUCs on the dataset level (so for every reference set a single entry)

In [4]:
# Load the data
out4 = pd.read_excel("~/Desktop/AUCs per refset p-values as in manuscript (also for DEPICT and genetic distance).xlsx")

In [5]:
out4

Unnamed: 0.1,Unnamed: 0,Ensemble ranks,Ensemble scores,Ensemble combo,Genetic distance AUC,DEPICT For SNP AUC,network distance AUC,EVOKE AUC,node2vec normal AUC,node2vec graphlet AUC,node2vec autoencode AUC,node2vec combi AUC,predicates AUC,rdf2vec AUC
0,Farashi,0.820062,0.807431,0.81825,0.792803,0.767,0.729054,0.74011,0.795185,0.758276,0.766899,0.790697,0.788767,0.770638
1,Farashi p-value cutoff,0.884146,0.831301,0.879065,0.720471,0.759,0.79878,0.729675,0.718496,0.71748,0.737805,0.775407,0.786585,0.807927
2,DeRycke,0.803143,0.813558,0.794343,0.735147,0.795,0.752676,0.76085,0.749031,0.757152,0.723883,0.720357,0.676884,0.771588
3,Teslovich,0.886757,0.882255,0.888415,0.778547,0.862,0.811157,0.793154,0.739616,0.741394,0.780768,0.79185,0.902737,0.833387


In [6]:
ranks_hist = out4.assign(**out4.iloc[:, 1:].rank(axis = 1, ascending = True).astype(int))

In [7]:
ranks_hist

Unnamed: 0.1,Unnamed: 0,Ensemble ranks,Ensemble scores,Ensemble combo,Genetic distance AUC,DEPICT For SNP AUC,network distance AUC,EVOKE AUC,node2vec normal AUC,node2vec graphlet AUC,node2vec autoencode AUC,node2vec combi AUC,predicates AUC,rdf2vec AUC
0,Farashi,13,11,12,9,5,1,2,10,3,4,8,7,6
1,Farashi p-value cutoff,13,11,12,3,6,9,4,2,1,5,7,8,10
2,DeRycke,12,13,10,4,11,6,8,5,7,3,2,1,9
3,Teslovich,11,10,12,3,9,7,6,1,2,4,5,13,8


In [8]:
ranks_hist.mean(numeric_only = True)

Ensemble ranks             12.25
Ensemble scores            11.25
Ensemble combo             11.50
Genetic distance AUC        4.75
DEPICT For SNP AUC          7.75
network distance AUC        5.75
EVOKE AUC                   5.00
node2vec normal AUC         4.50
node2vec graphlet AUC       3.25
node2vec autoencode AUC     4.00
node2vec combi AUC          5.50
predicates AUC              7.25
rdf2vec AUC                 8.25
dtype: float64

### All Friedman (also with combined methods)

In [10]:
all_together = scipy.stats.friedmanchisquare(out4["Ensemble ranks"],
                                               out4["Ensemble scores"],
                                               out4["Ensemble combo"],
                                               out4["Genetic distance AUC"], 
                                               out4["DEPICT For SNP AUC"], 
                                               out4["EVOKE AUC"], 
                                               out4["network distance AUC"], 
                                               out4["node2vec normal AUC"], 
                                               out4["node2vec graphlet AUC"], 
                                               out4["node2vec autoencode AUC"], 
                                               out4["node2vec combi AUC"], 
                                               out4["predicates AUC"], 
                                               out4["rdf2vec AUC"])

In [11]:
print(round(all_together[1],4))

0.0038


In [22]:
# Iman-Davenport for manuscript numbers
Iman(all_together[0], 13, 4, 0.00018)

[4.609756097560982, 4.5915961537520165]

### Friedman test individual methods

In [8]:
all_individual = scipy.stats.friedmanchisquare(out4["Genetic distance AUC"], 
                                               out4["DEPICT For SNP AUC"], 
                                               out4["EVOKE AUC"], 
                                               out4["network distance AUC"], 
                                               out4["node2vec normal AUC"], 
                                               out4["node2vec graphlet AUC"], 
                                               out4["node2vec autoencode AUC"], 
                                               out4["node2vec combi AUC"], 
                                               out4["predicates AUC"], 
                                               out4["rdf2vec AUC"])

Original p-value:

In [9]:
print(round(all_individual[1],4))

0.3932


In [10]:
# Iman-Davenport for manuscript numbers
Iman(all_individual[0], 10, 4, 0.42)

[1.074074074074071, 1.0626035808540077]

In [11]:
# Iman-Davenport for DEPICT & Genetic distance aligned numbers
Iman(all_individual[0], 10, 4, 0.02)

[1.074074074074071, 2.7552846946535015]

### Nemeyi post-hoc analysis individual methods

In [12]:
# Nemenyi for an all-vs-all comparison
nemeyi = sp.posthoc_nemenyi_friedman(out4[["Genetic distance AUC", "DEPICT For SNP AUC", "EVOKE AUC", "network distance AUC", "node2vec normal AUC", "node2vec graphlet AUC", "node2vec autoencode AUC", "node2vec combi AUC", "predicates AUC","rdf2vec AUC"]])
nemeyi[nemeyi < 0.05]

Unnamed: 0,Genetic distance AUC,DEPICT For SNP AUC,EVOKE AUC,network distance AUC,node2vec normal AUC,node2vec graphlet AUC,node2vec autoencode AUC,node2vec combi AUC,predicates AUC,rdf2vec AUC
Genetic distance AUC,,,,,,,,,,
DEPICT For SNP AUC,,,,,,,,,,
EVOKE AUC,,,,,,,,,,
network distance AUC,,,,,,,,,,
node2vec normal AUC,,,,,,,,,,
node2vec graphlet AUC,,,,,,,,,,
node2vec autoencode AUC,,,,,,,,,,
node2vec combi AUC,,,,,,,,,,
predicates AUC,,,,,,,,,,
rdf2vec AUC,,,,,,,,,,


What happens if we remove DEPICT?

In [13]:
all_individual = scipy.stats.friedmanchisquare(out4["Genetic distance AUC"], 
                                               out4["EVOKE AUC"], 
                                               out4["network distance AUC"], 
                                               out4["node2vec normal AUC"], 
                                               out4["node2vec graphlet AUC"], 
                                               out4["node2vec autoencode AUC"], 
                                               out4["node2vec combi AUC"], 
                                               out4["predicates AUC"], 
                                               out4["rdf2vec AUC"])

In [14]:
print(round(all_individual[1],4))

0.5295


In [15]:
# DEPICT & Genetic distance aligned numbers
Iman(all_individual[0], 9, 4, 0.05)

[0.8502673796791438, 2.355081494846207]

No significant results, so no need to further examine

## Combination methods & individual methods that are part of it

### Ranks

In [16]:
combi_and_parts = scipy.stats.friedmanchisquare(out4["Ensemble ranks"],
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["network distance AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["predicates AUC"], 
                                                out4["rdf2vec AUC"])

In [17]:
print(round(combi_and_parts[1],4))

0.0669


In [18]:
Iman(combi_and_parts[0], 7, 4, 0.04)

[2.8947368421052593, 2.838056021222002]

In [19]:
Dunn = sp.posthoc_dunn([
                 out4["Ensemble ranks"],
                 out4["DEPICT For SNP AUC"], 
                 out4["EVOKE AUC"], 
                 out4["network distance AUC"], 
                 out4["node2vec normal AUC"], 
                 out4["predicates AUC"], 
                 out4["rdf2vec AUC"]])

In [20]:
Dunn

Unnamed: 0,1,2,3,4,5,6,7
1,1.0,0.143927,0.009914,0.058608,0.005946,0.102418,0.263788
2,0.143927,1.0,0.263788,0.66734,0.197258,0.8635,0.730966
3,0.009914,0.263788,1.0,0.491653,0.8635,0.344372,0.143927
4,0.058608,0.66734,0.491653,1.0,0.390008,0.796499,0.439142
5,0.005946,0.197258,0.8635,0.390008,1.0,0.263788,0.102418
6,0.102418,0.8635,0.344372,0.796499,0.263788,1.0,0.606021
7,0.263788,0.730966,0.143927,0.439142,0.102418,0.606021,1.0


In [21]:
# Holm alpha correction as in Demsar
p_values = pd.DataFrame({"original" : Dunn[1].sort_values()})
p_values["Sequence"] = range(1, len(Dunn) + 1)
p_values["Holm"] = 0.05 / (len(Dunn) - p_values["Sequence"])
p_values["significant"] = p_values["original"] < p_values["Holm"]
print(p_values)

   original  Sequence      Holm  significant
5  0.005946         1  0.008333         True
3  0.009914         2  0.010000         True
4  0.058608         3  0.012500        False
6  0.102418         4  0.016667        False
2  0.143927         5  0.025000        False
7  0.263788         6  0.050000        False
1  1.000000         7       inf         True


### Scores

In [22]:
combi_and_parts = scipy.stats.friedmanchisquare(out4["Ensemble scores"],
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["node2vec graphlet AUC"], 
                                                out4["predicates AUC"], )

In [23]:
print(round(combi_and_parts[1],4))

0.0573


In [24]:
Iman(combi_and_parts[0], 6, 4, 0.03)

[3.4615384615384572, 3.3937762400724214]

In [25]:
Dunn = sp.posthoc_dunn([
                 out4["Ensemble scores"],
                 out4["DEPICT For SNP AUC"], 
                 out4["EVOKE AUC"], 
                 out4["node2vec normal AUC"], 
                 out4["node2vec graphlet AUC"], 
                 out4["predicates AUC"]])

In [26]:
Dunn

Unnamed: 0,1,2,3,4,5,6
1,1.0,0.317311,0.024449,0.014286,0.00596,0.147059
2,0.317311,1.0,0.2113,0.147059,0.080118,0.65271
3,0.024449,0.2113,1.0,0.841481,0.617075,0.423711
4,0.014286,0.147059,0.841481,1.0,0.764177,0.317311
5,0.00596,0.080118,0.617075,0.764177,1.0,0.193601
6,0.147059,0.65271,0.423711,0.317311,0.193601,1.0


In [27]:
# Holm alpha correction as in Demsar
p_values = pd.DataFrame({"original" : Dunn[1].sort_values()})
p_values["Sequence"] = range(1, len(Dunn) + 1)
p_values["Holm"] = 0.05 / (len(Dunn) - p_values["Sequence"])
p_values["significant"] = p_values["original"] < p_values["Holm"]
print(p_values)

   original  Sequence      Holm  significant
5  0.005960         1  0.010000         True
4  0.014286         2  0.012500        False
3  0.024449         3  0.016667        False
6  0.147059         4  0.025000        False
2  0.317311         5  0.050000        False
1  1.000000         6       inf         True


### Combo

In [28]:
combi_and_parts = scipy.stats.friedmanchisquare(
                                                 out4["Ensemble combo"],
                                                 out4["DEPICT For SNP AUC"], 
                                                 out4["EVOKE AUC"], 
                                                 out4["node2vec normal AUC"], 
                                                 out4["predicates AUC"], 
                                                 out4["rdf2vec AUC"])

In [29]:
print(round(combi_and_parts[1],4))

0.1035


In [30]:
Iman(combi_and_parts[0], 6, 4, 0.076)

[2.5263157894736823, 2.5168107817378083]

In [31]:
Dunn = sp.posthoc_dunn([
                 out4["Ensemble combo"],
                 out4["DEPICT For SNP AUC"], 
                 out4["EVOKE AUC"], 
                 out4["node2vec normal AUC"], 
                 out4["predicates AUC"], 
                 out4["rdf2vec AUC"]])

In [32]:
Dunn

Unnamed: 0,1,2,3,4,5,6
1,1.0,0.193601,0.014286,0.012419,0.147059,0.317311
2,0.193601,1.0,0.250144,0.230139,0.880765,0.764177
3,0.014286,0.250144,1.0,0.960122,0.317311,0.147059
4,0.012419,0.230139,0.960122,1.0,0.293718,0.133614
5,0.147059,0.880765,0.317311,0.293718,1.0,0.65271
6,0.317311,0.764177,0.147059,0.133614,0.65271,1.0


In [33]:
# Holm alpha correction as in Demsar
p_values = pd.DataFrame({"original" : Dunn[1].sort_values()})
p_values["Sequence"] = range(1, len(Dunn) + 1)
p_values["Holm"] = 0.05 / (len(Dunn) - p_values["Sequence"])
p_values["significant"] = p_values["original"] < p_values["Holm"]
print(p_values)

   original  Sequence      Holm  significant
4  0.012419         1  0.010000        False
3  0.014286         2  0.012500        False
5  0.147059         3  0.016667        False
2  0.193601         4  0.025000        False
6  0.317311         5  0.050000        False
1  1.000000         6       inf         True


## Combined methods vs. all individual methods

In [34]:
combi_and_parts = scipy.stats.friedmanchisquare(out4["Ensemble ranks"],
                                                out4["Genetic distance AUC"], 
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["network distance AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["node2vec graphlet AUC"], 
                                                out4["node2vec autoencode AUC"], 
                                                out4["node2vec combi AUC"], 
                                                out4["predicates AUC"], 
                                                out4["rdf2vec AUC"])

In [35]:
print(round(combi_and_parts[1],4))

0.0714


In [36]:
Iman(combi_and_parts[0], 11, 4, 0.05)

[2.2485089463220715, 2.164579917125473]

In [37]:
Dunn = sp.posthoc_dunn([
                        out4["Ensemble ranks"],
                        out4["Genetic distance AUC"], 
                        out4["DEPICT For SNP AUC"], 
                        out4["EVOKE AUC"], 
                        out4["network distance AUC"], 
                        out4["node2vec normal AUC"], 
                        out4["node2vec graphlet AUC"], 
                        out4["node2vec autoencode AUC"], 
                        out4["node2vec combi AUC"], 
                        out4["predicates AUC"], 
                        out4["rdf2vec AUC"]])

In [38]:
Dunn

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11
1,1.0,0.013243,0.195792,0.0143,0.073604,0.006989,0.002051,0.006989,0.04451,0.098648,0.2956
2,0.013243,1.0,0.236597,0.978042,0.491388,0.825721,0.544827,0.825721,0.639849,0.408961,0.152358
3,0.195792,0.236597,1.0,0.247676,0.620294,0.1604,0.073604,0.1604,0.474222,0.720483,0.804353
4,0.0143,0.978042,0.247676,1.0,0.508883,0.804353,0.526698,0.804353,0.659658,0.424755,0.1604
5,0.073604,0.491388,0.620294,0.508883,1.0,0.363722,0.195792,0.363722,0.825721,0.89054,0.457391
6,0.006989,0.825721,0.1604,0.804353,0.363722,1.0,0.699988,1.0,0.491388,0.2956,0.098648
7,0.002051,0.544827,0.073604,0.526698,0.195792,0.699988,1.0,0.699988,0.283074,0.152358,0.041672
8,0.006989,0.825721,0.1604,0.804353,0.363722,1.0,0.699988,1.0,0.491388,0.2956,0.098648
9,0.04451,0.639849,0.474222,0.659658,0.825721,0.491388,0.283074,0.491388,1.0,0.720483,0.335375
10,0.098648,0.408961,0.720483,0.424755,0.89054,0.2956,0.152358,0.2956,0.720483,1.0,0.544827


In [39]:
# Holm alpha correction as in Demsar
p_values = pd.DataFrame({"original" : Dunn[1].sort_values()})
p_values["Sequence"] = range(1, len(Dunn) + 1)
p_values["Holm"] = 0.05 / (len(Dunn) - p_values["Sequence"])
p_values["significant"] = p_values["original"] < p_values["Holm"]
print(p_values)

    original  Sequence      Holm  significant
7   0.002051         1  0.005000         True
6   0.006989         2  0.005556        False
8   0.006989         3  0.006250        False
2   0.013243         4  0.007143        False
4   0.014300         5  0.008333        False
9   0.044510         6  0.010000        False
5   0.073604         7  0.012500        False
10  0.098648         8  0.016667        False
3   0.195792         9  0.025000        False
11  0.295600        10  0.050000        False
1   1.000000        11       inf         True


### Scores

In [40]:
combi_and_parts = scipy.stats.friedmanchisquare(out4["Ensemble scores"],
                                                out4["Genetic distance AUC"], 
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["network distance AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["node2vec graphlet AUC"], 
                                                out4["node2vec autoencode AUC"], 
                                                out4["node2vec combi AUC"], 
                                                out4["predicates AUC"], 
                                                out4["rdf2vec AUC"])

In [41]:
print(round(combi_and_parts[1],4))

0.0714


In [42]:
Iman(combi_and_parts[0], 11, 4, 0.05)

[2.2485089463220715, 2.164579917125473]

In [43]:
Dunn = sp.posthoc_dunn([
                 out4["Ensemble scores"],
                 out4["Genetic distance AUC"], 
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["network distance AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["node2vec graphlet AUC"], 
                                                out4["node2vec autoencode AUC"], 
                                                out4["node2vec combi AUC"], 
                                                out4["predicates AUC"], 
                                                out4["rdf2vec AUC"]])

In [44]:
Dunn

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11
1,1.0,0.01543,0.225872,0.016639,0.082915,0.008234,0.002465,0.008234,0.050676,0.1104,0.335375
2,0.01543,1.0,0.225872,0.978042,0.491388,0.825721,0.544827,0.825721,0.639849,0.408961,0.144626
3,0.225872,0.225872,1.0,0.236597,0.601004,0.152358,0.06928,0.152358,0.457391,0.699988,0.804353
4,0.016639,0.978042,0.236597,1.0,0.508883,0.804353,0.526698,0.804353,0.659658,0.424755,0.152358
5,0.082915,0.491388,0.601004,0.508883,1.0,0.363722,0.195792,0.363722,0.825721,0.89054,0.4409
6,0.008234,0.825721,0.152358,0.804353,0.363722,1.0,0.699988,1.0,0.491388,0.2956,0.093158
7,0.002465,0.544827,0.06928,0.526698,0.195792,0.699988,1.0,0.699988,0.283074,0.152358,0.038989
8,0.008234,0.825721,0.152358,0.804353,0.363722,1.0,0.699988,1.0,0.491388,0.2956,0.093158
9,0.050676,0.639849,0.457391,0.659658,0.825721,0.491388,0.283074,0.491388,1.0,0.720483,0.32175
10,0.1104,0.408961,0.699988,0.424755,0.89054,0.2956,0.152358,0.2956,0.720483,1.0,0.526698


In [45]:
# Holm alpha correction as in Demsar
p_values = pd.DataFrame({"original" : Dunn[1].sort_values()})
p_values["Sequence"] = range(1, len(Dunn) + 1)
p_values["Holm"] = 0.05 / (len(Dunn) - p_values["Sequence"])
p_values["significant"] = p_values["original"] < p_values["Holm"]
print(p_values)

    original  Sequence      Holm  significant
7   0.002465         1  0.005000         True
6   0.008234         2  0.005556        False
8   0.008234         3  0.006250        False
2   0.015430         4  0.007143        False
4   0.016639         5  0.008333        False
9   0.050676         6  0.010000        False
5   0.082915         7  0.012500        False
10  0.110400         8  0.016667        False
3   0.225872         9  0.025000        False
11  0.335375        10  0.050000        False
1   1.000000        11       inf         True


### Combo

In [46]:
combi_and_parts = scipy.stats.friedmanchisquare(
                                                out4["Ensemble combo"],
                                                out4["Genetic distance AUC"], 
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["network distance AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["node2vec graphlet AUC"], 
                                                out4["node2vec autoencode AUC"], 
                                                out4["node2vec combi AUC"], 
                                                out4["predicates AUC"], 
                                                out4["rdf2vec AUC"])

In [47]:
print(round(combi_and_parts[1],4))

0.0839


In [48]:
Iman(combi_and_parts[0], 11, 4, 0.05)

[2.12621359223301, 2.164579917125473]

In [49]:
Dunn = sp.posthoc_dunn([
                 out4["Ensemble combo"],
                 out4["Genetic distance AUC"], 
                                                out4["DEPICT For SNP AUC"], 
                                                out4["EVOKE AUC"], 
                                                out4["network distance AUC"], 
                                                out4["node2vec normal AUC"], 
                                                out4["node2vec graphlet AUC"], 
                                                out4["node2vec autoencode AUC"], 
                                                out4["node2vec combi AUC"], 
                                                out4["predicates AUC"], 
                                                out4["rdf2vec AUC"]])

In [50]:
Dunn

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11
1,1.0,0.016639,0.236597,0.017929,0.093158,0.009674,0.002699,0.008928,0.054019,0.116677,0.335375
2,0.016639,1.0,0.225872,0.978042,0.474222,0.847218,0.544827,0.825721,0.639849,0.408961,0.152358
3,0.236597,0.225872,1.0,0.236597,0.620294,0.1604,0.06928,0.152358,0.457391,0.699988,0.825721
4,0.017929,0.978042,0.236597,1.0,0.491388,0.825721,0.526698,0.804353,0.659658,0.424755,0.1604
5,0.093158,0.474222,0.620294,0.491388,1.0,0.363722,0.186449,0.349366,0.804353,0.912333,0.474222
6,0.009674,0.847218,0.1604,0.825721,0.363722,1.0,0.679708,0.978042,0.508883,0.308492,0.104393
7,0.002699,0.544827,0.06928,0.526698,0.186449,0.679708,1.0,0.699988,0.283074,0.152358,0.041672
8,0.008928,0.825721,0.152358,0.804353,0.349366,0.978042,0.699988,1.0,0.491388,0.2956,0.098648
9,0.054019,0.639849,0.457391,0.659658,0.804353,0.508883,0.283074,0.491388,1.0,0.720483,0.335375
10,0.116677,0.408961,0.699988,0.424755,0.912333,0.308492,0.152358,0.2956,0.720483,1.0,0.544827


In [51]:
# Holm alpha correction as in Demsar
p_values = pd.DataFrame({"original" : Dunn[1].sort_values()})
p_values["Sequence"] = range(1, len(Dunn) + 1)
p_values["Holm"] = 0.05 / (len(Dunn) - p_values["Sequence"])
p_values["significant"] = p_values["original"] < p_values["Holm"]
print(p_values)

    original  Sequence      Holm  significant
7   0.002699         1  0.005000         True
8   0.008928         2  0.005556        False
6   0.009674         3  0.006250        False
2   0.016639         4  0.007143        False
4   0.017929         5  0.008333        False
9   0.054019         6  0.010000        False
5   0.093158         7  0.012500        False
10  0.116677         8  0.016667        False
3   0.236597         9  0.025000        False
11  0.335375        10  0.050000        False
1   1.000000        11       inf         True
