In [1]:
%load_ext autoreload


In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.private.baseline as baseline
import experiments.founder_rank.data.private.random as random
import experiments.founder_rank.data.private.naive as naive
import experiments.founder_rank.data.private.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'])
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)
np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.91689477, 0.53587024],
       [0.91689477, 1.        , 0.51547319],
       [0.53587024, 0.51547319, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.17732932897868992, coeff=[1.5334213]
only_betweenness: r^2=-0.3352246084978885, coeff=[1.61867525]
only_closeness: r^2=0.31764119440324556, coeff=[0.47208079]
pr_and_closeness: r^2=0.3577394125945218, coeff=[0.41764429 0.39333173]
weighted: r^2=0.3585551688061206, coeff=[0.30418127 0.1334225  0.39447958]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

2.1435899981703272e+188
baseline: 1.0
random: 0.18060326274376168
naive: 0.396502343841324
only_pr: 0.5252712303827227
only_betweenness: 0.36572542698814947
only_closeness: 0.34739602450126317
pr_and_closeness: 0.526390662794714
weighted: 0.4420531952123817


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.1
naive: 0.2
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 20
baseline: 1.0
random: 0.05
naive: 0.2
only_pr: 0.15
only_betweenness: 0.2
only_closeness: 0.25
pr_and_closeness: 0.2
weighted: 0.2
n = 50
baseline: 1.0
random: 0.06
naive: 0.38
only_pr: 0.34
only_betweenness: 0.34
only_closeness: 0.3
pr_and_closeness: 0.38
weighted: 0.38


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=4.663665584756559e-306)
random: KendalltauResult(correlation=-0.00794871794871795, pvalue=0.7662798727558393)
naive: KendalltauResult(correlation=0.47521025641025644, pvalue=1.1937092634455458e-70)
only_pr: KendalltauResult(correlation=0.4076, pvalue=1.851966354436399e-52)
only_betweenness: KendalltauResult(correlation=0.4298871794871795, pvalue=3.774099703928743e-58)
only_closeness: KendalltauResult(correlation=0.4866666666666667, pvalue=5.252739970865429e-74)
pr_and_closeness: KendalltauResult(correlation=0.48043076923076933, pvalue=3.607796412583193e-72)
weighted: KendalltauResult(correlation=0.48037948717948725, pvalue=3.734651970793418e-72)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=-0.01111998033914967, pvalue=0.7814333838140232)
naive: SpearmanrResult(correlation=0.6549147702138118, pvalue=7.944940470742049e-78)
only_pr: SpearmanrResult(correlation=0.5730580486606047, pvalue=7.776668490043354e-56)
only_betweenness: SpearmanrResult(correlation=0.6051929220938806, pvalue=1.040096083479409e-63)
only_closeness: SpearmanrResult(correlation=0.6701233226837062, pvalue=1.0720526231979262e-82)
pr_and_closeness: SpearmanrResult(correlation=0.6600343573359548, pvalue=1.9612842228851042e-79)
weighted: SpearmanrResult(correlation=0.6608348980093389, pvalue=1.0922099523145491e-79)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.2463234841875151
naive: 0.08224137446721991
only_pr: 0.15327530217970733
only_betweenness: 0.1913546147415874
only_closeness: 0.132022175712766
pr_and_closeness: 0.10214416048013067
weighted: 0.10125255437069981


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.2297591911423119
naive: 0.06466490225995233
only_pr: 0.12149672426252768
only_betweenness: 0.1549092431690086
only_closeness: 0.09227013392461829
pr_and_closeness: 0.0761589042711777
weighted: 0.07615527105119875
