In [15]:
from collections import defaultdict
from itertools import product

import Orange
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import rankdata
from sklearn.cluster import AgglomerativeClustering

from helpers import Datasets_Data, SBM_Data, LFR_Data, calc_avranks, sbm_neighbour_score

In [16]:
_, results_datasets, results_modularity = Datasets_Data().load_precalculated()
_, results_sbm, _ = SBM_Data().load_precalculated()
_, results_lfr, _ = LFR_Data().load_precalculated()

In [17]:
allowed_datasets = set([x[12:] for x in results_lfr.keys()])
# allowed2 = set([
#     'dolphins',
#     'football',
#     'karate',
#     'polbooks',
#     'sp_school_day_1', 'sp_school_day_2',
#     'news_2cl1_0.1', 'news_2cl2_0.1', 'news_2cl3_0.1',
#     'news_3cl1_0.1', 'news_3cl2_0.1', 'news_3cl3_0.1',
#     'news_5cl1_0.1', 'news_5cl2_0.1'
# ])
# allowed_datasets = allowed_datasets.intersection(allowed2)
allowed_datasets = list(allowed_datasets)
allowed_datasets

['news_2cl1_0.1',
 'news_2cl3_0.1',
 'news_3cl1_0.1',
 'news_2cl2_0.1',
 'news_3cl2_0.1',
 'cora_IR',
 'news_5cl1_0.1',
 'karate',
 'news_5cl2_0.1',
 'polblogs',
 'news_3cl3_0.1',
 'news_5cl3_0.1',
 'cora_DB']

In [18]:
kernel_names = list(results_datasets[allowed_datasets[1]][0].keys())
kernel_names

['Katz',
 'logKatz',
 'For',
 'logFor',
 'Comm',
 'logComm',
 'Heat',
 'logHeat',
 'NHeat',
 'logNHeat',
 'SCT',
 'SCCT',
 'RSP',
 'FE',
 'PPR',
 'logPPR',
 'ModifPPR',
 'logModifPPR',
 'HeatPR',
 'logHeatPR',
 'DF',
 'logDF',
 'Abs',
 'logAbs',
 'SP-CT']

# Baseline (best measuse for all)

In [5]:
baseline = defaultdict(list)
for dataset in allowed_datasets:
    for i in range(7):
        for kernel_name in kernel_names:
            result = results_datasets[dataset][i][kernel_name]
            assert type(result) == float
            baseline[kernel_name].append(result)
baseline = {k: np.mean(v) for k, v in baseline.items()}
# baseline

In [6]:
baseline_measure = list(baseline.keys())[np.argmax(list(baseline.values()))]
baseline_meanari = baseline[baseline_measure]
print(f"{baseline_measure}: {baseline_meanari:.3f}")

SCT: 0.782


# Upper bound

In [7]:
def calc_upperbound(results_datasets, n=7, prefix=''):
    upperbound_measures = {}
    upperbound_meanaris = {}
    for dataset in allowed_datasets:
        dataset_results = defaultdict(list)
        for i in range(n):
            for kernel_name in kernel_names:
                result = results_datasets[prefix + dataset][i][kernel_name]
                assert type(result) == float, f"{dataset}, {i}, {kernel_name}"
                dataset_results[kernel_name].append(result)
        dataset_results = {k: np.mean(v) for k, v in dataset_results.items()}
        dataset_bestmeasure = list(dataset_results.keys())[np.argmax(list(dataset_results.values()))]
        dataset_bestmeanari = dataset_results[dataset_bestmeasure]
        upperbound_measures[dataset] = dataset_bestmeasure
        upperbound_meanaris[dataset] = dataset_bestmeanari
    return upperbound_measures, upperbound_meanaris

In [8]:
upperbound_measures, upperbound_meanaris = calc_upperbound(results_datasets)

upperbound_meanari = np.mean(list(upperbound_meanaris.values()))
print(f'upper bound {upperbound_meanari:.3f}')
upperbound_measures

upper bound 0.790


{'news_2cl1_0.1': 'SCT',
 'news_2cl3_0.1': 'logFor',
 'news_3cl1_0.1': 'HeatPR',
 'news_2cl2_0.1': 'SCT',
 'news_3cl2_0.1': 'logHeatPR',
 'news_5cl1_0.1': 'logFor',
 'karate': 'Katz',
 'news_5cl2_0.1': 'FE',
 'news_3cl3_0.1': 'NHeat'}

# Ours LFR

In [9]:
lfr_measures, lfr_meanaris = calc_upperbound(results_lfr, n=1, prefix='dataset2lfr_')
lfr_measures

{'news_2cl1_0.1': 'logKatz',
 'news_2cl3_0.1': 'logKatz',
 'news_3cl1_0.1': 'logKatz',
 'news_2cl2_0.1': 'SCT',
 'news_3cl2_0.1': 'SCT',
 'news_5cl1_0.1': 'logNHeat',
 'karate': 'RSP',
 'news_5cl2_0.1': 'logPPR',
 'news_3cl3_0.1': 'Heat'}

In [10]:
ours_meanaris = {}
for dataset in allowed_datasets:
    dataset_results = []
    for i in range(7):
        result = results_datasets[dataset][i][lfr_measures[dataset]]
        assert type(result) == float
        dataset_results.append(result)
    ours_meanaris[dataset] = np.mean(dataset_results)

In [11]:
ours_meanari = np.mean(list(ours_meanaris.values()))
print(f'ours lfr {ours_meanari:.3f}')

ours lfr 0.766


# Ours SBM

In [12]:
sbm_measures, sbm_meanaris = calc_upperbound(results_sbm, n=1, prefix='dataset2sbm_')
sbm_measures

{'news_2cl1_0.1': 'logKatz',
 'news_2cl3_0.1': 'Katz',
 'news_3cl1_0.1': 'logKatz',
 'news_2cl2_0.1': 'logKatz',
 'news_3cl2_0.1': 'logKatz',
 'news_5cl1_0.1': 'logFor',
 'karate': 'Katz',
 'news_5cl2_0.1': 'SCCT',
 'news_3cl3_0.1': 'logFor'}

In [13]:
ours_meanaris = {}
for dataset in allowed_datasets:
    dataset_results = []
    for i in range(7):
        result = results_datasets[dataset][i][sbm_measures[dataset]]
        assert type(result) == float
        dataset_results.append(result)
    ours_meanaris[dataset] = np.mean(dataset_results)

In [14]:
ours_meanari = np.mean(list(ours_meanaris.values()))
print(f'ours sbm {ours_meanari:.3f}')

ours sbm 0.724
