In [1]:
%load_ext autoreload


In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.private.baseline as baseline
import experiments.founder_rank.data.private.random as random
import experiments.founder_rank.data.private.naive as naive
import experiments.founder_rank.data.private.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'])
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)
np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.91689477, 0.53587024],
       [0.91689477, 1.        , 0.51547319],
       [0.53587024, 0.51547319, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=0.2562086473264755, coeff=[1.13653772]
only_betweenness: r^2=0.19296682619626782, coeff=[1.22699022]
only_closeness: r^2=0.16811743607525442, coeff=[0.27269301]
pr_and_closeness: r^2=0.3123736375656647, coeff=[0.78040366 0.12554375]
weighted: r^2=0.3146415227573024, coeff=[0.59402649 0.21916311 0.12742923]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

2.1435899981703272e+188
baseline: 1.0
random: 0.157310319642317
naive: 0.39869350465721554
only_pr: 0.5292826566116889
only_betweenness: 0.36431767374311913
only_closeness: 0.3470809348995768
pr_and_closeness: 0.5299415176535516
weighted: 0.44414680982870147


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.3
pr_and_closeness: 0.2
weighted: 0.2
n = 20
baseline: 1.0
random: 0.1
naive: 0.15
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.1
weighted: 0.1
n = 50
baseline: 1.0
random: 0.06
naive: 0.38
only_pr: 0.4
only_betweenness: 0.34
only_closeness: 0.24
pr_and_closeness: 0.4
weighted: 0.38


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=4.663665584756559e-306)
random: KendalltauResult(correlation=-0.030410256410256412, pvalue=0.2554547652516961)
naive: KendalltauResult(correlation=0.5827897435897437, pvalue=2.6693803771270578e-105)
only_pr: KendalltauResult(correlation=0.49581538461538466, pvalue=9.614150060655254e-77)
only_betweenness: KendalltauResult(correlation=0.5315282051282052, pvalue=6.483046779084324e-88)
only_closeness: KendalltauResult(correlation=0.5774871794871795, pvalue=1.9889076662270004e-103)
pr_and_closeness: KendalltauResult(correlation=0.5489025641025642, pvalue=1.2520852284599117e-93)
weighted: KendalltauResult(correlation=0.5539692307692309, pvalue=2.493703397526784e-95)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=-0.046381420496436475, pvalue=0.24693036736928112)
naive: SpearmanrResult(correlation=0.7604095846645368, pvalue=6.860797865181096e-119)
only_pr: SpearmanrResult(correlation=0.6719074956991891, pvalue=2.7531913547198014e-83)
only_betweenness: SpearmanrResult(correlation=0.707934037847137, pvalue=3.6804474622069524e-96)
only_closeness: SpearmanrResult(correlation=0.7597426886212829, pvalue=1.4505372902564246e-118)
pr_and_closeness: SpearmanrResult(correlation=0.7288826247235194, pvalue=1.3605811499646126e-104)
weighted: SpearmanrResult(correlation=0.7324307692307692, pvalue=4.2348015370305313e-106)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.382674533038442
naive: 0.1704423584535529
only_pr: 0.09592259635054848
only_betweenness: 0.11483874764962773
only_closeness: 0.18878961522341559
pr_and_closeness: 0.11672909249021061
weighted: 0.11614775626361393


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.3406075290905881
naive: 0.14151585323199792
only_pr: 0.04979408771849987
only_betweenness: 0.056249428477709446
only_closeness: 0.13023601199771606
pr_and_closeness: 0.0818654051479628
weighted: 0.08096885092833195
