In [48]:
from scipy.stats import chisquare
from itertools import groupby
import collections
import numpy as np
import pandas as pd

In [24]:
def read_results(exp_id: str) -> (list, list):
    file_name = 'results/' + exp_id + '.txt'

    result_file = open(file_name, 'r')

    eq_node_results_line = result_file.readline()
    eq_node_results = list(
        map(int,
            eq_node_results_line[len('eq_node_results: ['):len(eq_node_results_line) - 2].split(', '))
    )

    different_node_results_line = result_file.readline()
    different_node_results = list(
        map(int,
            different_node_results_line[len('diff_node_results: ['):len(different_node_results_line) - 2].split(', '))
    )
    return eq_node_results, different_node_results



In [25]:
eq_node_results, different_node_results = read_results('6cbd343d-d8c8-4cd6-aa83-28547e55474b')


In [26]:
len(eq_node_results)

100

In [27]:
len(different_node_results)

100

In [28]:
chisquare(different_node_results, f_exp=eq_node_results)

Power_divergenceResult(statistic=2116.0278470272747, pvalue=0.0)

In [29]:
groupby(eq_node_results)

<itertools.groupby at 0x7f6f3620b8f0>

In [32]:
eq_counter = dict(collections.Counter(eq_node_results))
diff_counter = dict(collections.Counter(different_node_results))

In [34]:
diff_counter

{951: 1,
 985: 1,
 828: 1,
 811: 1,
 834: 1,
 841: 1,
 783: 1,
 814: 2,
 881: 1,
 1067: 1,
 829: 1,
 807: 2,
 840: 2,
 940: 2,
 918: 1,
 1026: 1,
 875: 1,
 728: 1,
 732: 1,
 914: 1,
 772: 1,
 848: 1,
 870: 1,
 752: 1,
 761: 1,
 894: 1,
 849: 2,
 935: 2,
 737: 1,
 910: 2,
 945: 1,
 855: 1,
 878: 1,
 792: 1,
 861: 1,
 884: 2,
 999: 1,
 876: 1,
 898: 1,
 771: 1,
 856: 1,
 671: 1,
 888: 3,
 730: 1,
 872: 1,
 936: 1,
 941: 1,
 948: 1,
 883: 1,
 931: 1,
 844: 1,
 925: 2,
 721: 1,
 785: 1,
 779: 1,
 791: 1,
 912: 1,
 995: 1,
 923: 1,
 879: 1,
 930: 2,
 662: 1,
 817: 1,
 873: 1,
 991: 1,
 808: 1,
 909: 2,
 854: 1,
 810: 1,
 748: 1,
 802: 1,
 703: 1,
 803: 1,
 902: 1,
 809: 1,
 824: 1,
 911: 2,
 723: 1,
 866: 1,
 885: 1,
 882: 1,
 899: 1,
 1097: 1,
 696: 1,
 954: 1,
 972: 1}

In [35]:
eq_counter

{794: 1,
 980: 1,
 1009: 1,
 905: 1,
 968: 1,
 892: 2,
 741: 1,
 969: 2,
 926: 1,
 1066: 1,
 846: 1,
 906: 1,
 841: 1,
 782: 3,
 1084: 1,
 800: 1,
 981: 1,
 935: 1,
 1003: 1,
 872: 1,
 839: 1,
 878: 1,
 1068: 1,
 1045: 1,
 932: 1,
 852: 1,
 874: 1,
 840: 1,
 771: 1,
 1044: 1,
 830: 1,
 864: 1,
 1005: 1,
 845: 2,
 893: 1,
 702: 1,
 870: 1,
 836: 1,
 860: 1,
 843: 1,
 923: 1,
 916: 1,
 1065: 1,
 788: 1,
 756: 1,
 847: 1,
 1010: 1,
 904: 1,
 809: 1,
 913: 1,
 898: 1,
 1022: 1,
 976: 1,
 959: 2,
 857: 1,
 789: 1,
 954: 2,
 949: 2,
 1034: 1,
 763: 1,
 881: 1,
 837: 1,
 803: 1,
 924: 1,
 922: 1,
 723: 1,
 666: 1,
 779: 2,
 810: 1,
 693: 1,
 879: 1,
 848: 1,
 983: 1,
 1019: 1,
 929: 1,
 818: 1,
 971: 1,
 942: 1,
 991: 1,
 945: 1,
 911: 1,
 921: 1,
 903: 1,
 805: 1,
 953: 1,
 944: 1,
 797: 1,
 947: 1,
 1049: 1,
 831: 1,
 844: 1}

In [99]:
min_bin = min(min(different_node_results), min(eq_node_results))
max_bin = max(max(different_node_results), max(eq_node_results))
bins = list(map(lambda x: int(x), list(
        np.arange(
            min_bin,
            max_bin,
            (max_bin - min_bin) / 20
        )
    )))

In [100]:
bins.append(max_bin)
bins

[662,
 683,
 705,
 727,
 749,
 770,
 792,
 814,
 836,
 857,
 879,
 901,
 923,
 944,
 966,
 988,
 1010,
 1031,
 1053,
 1075,
 1097]

In [101]:
max_bin

1097

In [102]:
eq_node_bins_results = list(pd.cut(eq_node_results, bins=bins, include_lowest=True, labels=False))
print(eq_node_bins_results)

[6, 14, 15, 11, 14, 10, 3, 14, 12, 18, 8, 11, 8, 5, 19, 6, 14, 12, 15, 9, 8, 9, 18, 17, 12, 8, 9, 8, 5, 17, 7, 9, 15, 8, 10, 1, 9, 7, 9, 8, 11, 11, 18, 5, 4, 8, 15, 11, 6, 8, 11, 10, 16, 14, 13, 8, 5, 14, 13, 13, 17, 4, 5, 10, 8, 6, 13, 12, 11, 2, 0, 5, 5, 6, 1, 9, 8, 14, 13, 16, 12, 7, 14, 12, 15, 13, 13, 11, 11, 11, 6, 13, 12, 10, 6, 13, 17, 7, 8, 5]


In [103]:
diff_node_bins_results = list(pd.cut(different_node_results, bins=bins, include_lowest=True, labels=False))
print(diff_node_bins_results)

[13, 14, 7, 6, 7, 8, 5, 6, 10, 18, 7, 6, 8, 12, 6, 11, 16, 9, 3, 3, 11, 5, 8, 9, 4, 4, 10, 8, 12, 3, 11, 13, 8, 9, 5, 9, 12, 10, 15, 9, 10, 12, 5, 8, 0, 10, 3, 9, 12, 8, 12, 13, 8, 10, 12, 8, 12, 2, 5, 5, 5, 11, 15, 11, 9, 12, 0, 7, 9, 15, 11, 6, 11, 8, 6, 3, 11, 6, 10, 1, 6, 11, 6, 10, 7, 11, 2, 9, 10, 10, 10, 10, 19, 12, 1, 12, 6, 11, 13, 14]


In [107]:
print(len(eq_node_bins_results))
print(len(diff_node_bins_results))
print(len(bins))

100
100
21


In [108]:
eq_counter = dict(collections.Counter(eq_node_bins_results))
for i in sorted (eq_counter) : 
    print ((i, eq_counter[i]), end =" ") 

(0, 1) (1, 2) (2, 1) (3, 1) (4, 2) (5, 8) (6, 7) (7, 4) (8, 13) (9, 7) (10, 5) (11, 10) (12, 7) (13, 9) (14, 8) (15, 5) (16, 2) (17, 4) (18, 3) (19, 1) 

In [109]:
diff_counter = dict(collections.Counter(diff_node_bins_results))
for i in sorted (diff_counter) : 
    print ((i, diff_counter[i]), end =" ") 

(0, 2) (1, 2) (2, 2) (3, 5) (4, 2) (5, 7) (6, 10) (7, 5) (8, 10) (9, 9) (10, 12) (11, 11) (12, 11) (13, 4) (14, 2) (15, 3) (16, 1) (18, 1) (19, 1) 

In [None]:
for k,v in diff_counter.items():
    if k not in eq_counter:
        diff_counter[k] = v
        
for k,v in eq_counter.items():
    if k not in diff_counter:
        eq_counter[k] = v        

In [None]:
for k,v in diff_counter.items():
    if k not in eq_counter:
        diff_counter[k] = 1
        
for k,v in eq_counter.items():
    if k not in diff_counter:
        eq_counter[k] = 1    

In [110]:
print(len(diff_counter))
print(len(eq_counter))
assert len(diff_counter) == len(eq_counter), 'statistics are not equal'

19
20


In [95]:
chisquare(eq_node_bins_results, f_exp=diff_node_bins_results)

Power_divergenceResult(statistic=inf, pvalue=0.0)

In [96]:
chisquare(eq_node_bins_results, f_exp=diff_node_bins_results)

Power_divergenceResult(statistic=inf, pvalue=0.0)