In [1]:
%load_ext autoreload


In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.private.baseline as baseline
import experiments.founder_rank.data.private.random as random
import experiments.founder_rank.data.private.naive as naive
import experiments.founder_rank.data.private.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'])
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)
np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.9168935 , 0.5358719 ],
       [0.9168935 , 1.        , 0.51547319],
       [0.5358719 , 0.51547319, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.13336698261708269, coeff=[0.00222696]
only_betweenness: r^2=-0.29297141799068305, coeff=[1.5737442]
only_closeness: r^2=0.30673542058553793, coeff=[0.45353152]
pr_and_closeness: r^2=0.35416523079900375, coeff=[0.0006658 0.3692907]
weighted: r^2=0.3545498107735512, coeff=[0.00055161 0.09010513 0.37006597]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

2.1435899981703272e+188
baseline: 1.0
random: 0.21170693512455982
naive: 0.5249818277214559
only_pr: 0.5249807203302584
only_betweenness: 0.3652083093261328
only_closeness: 0.34657434496786554
pr_and_closeness: 0.5272585189085585
weighted: 0.44131327679884746


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.0
naive: 0.1
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 20
baseline: 1.0
random: 0.0
naive: 0.15
only_pr: 0.15
only_betweenness: 0.2
only_closeness: 0.25
pr_and_closeness: 0.2
weighted: 0.2
n = 50
baseline: 1.0
random: 0.04
naive: 0.34
only_pr: 0.34
only_betweenness: 0.3
only_closeness: 0.24
pr_and_closeness: 0.34
weighted: 0.36


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=4.663665584756559e-306)
random: KendalltauResult(correlation=-0.05505641025641026, pvalue=0.03950866364174224)
naive: KendalltauResult(correlation=0.41320000000000007, pvalue=7.344948113750452e-54)
only_pr: KendalltauResult(correlation=0.4108307692307693, pvalue=2.8927229641822075e-53)
only_betweenness: KendalltauResult(correlation=0.4338564102564103, pvalue=3.402392102766875e-59)
only_closeness: KendalltauResult(correlation=0.48533333333333345, pvalue=1.3035081373469383e-73)
pr_and_closeness: KendalltauResult(correlation=0.4805743589743591, pvalue=3.2749964372184396e-72)
weighted: KendalltauResult(correlation=0.4811384615384616, pvalue=2.2385533111903525e-72)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=-0.08264231998033916, pvalue=0.038879852088810486)
naive: SpearmanrResult(correlation=0.5805169820594741, pvalue=1.3829971008507094e-57)
only_pr: SpearmanrResult(correlation=0.5769939051363973, pvalue=9.397069846224258e-57)
only_betweenness: SpearmanrResult(correlation=0.6084856230031949, pvalue=1.4419932528515797e-64)
only_closeness: SpearmanrResult(correlation=0.6689967068075695, pvalue=2.5170520604099495e-82)
pr_and_closeness: SpearmanrResult(correlation=0.6609124600638978, pvalue=1.0318877799374397e-79)
weighted: SpearmanrResult(correlation=0.6618623740476777, pvalue=5.138683659245205e-80)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.2723075614685373
naive: 0.19672235523663104
only_pr: 0.14266547064856686
only_betweenness: 0.18077572063367597
only_closeness: 0.13347453609204543
pr_and_closeness: 0.10002434373758799
weighted: 0.09947654203066537


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.25074932914894055
naive: 0.15792439473825973
only_pr: 0.11407721809869076
only_betweenness: 0.14680929114886246
only_closeness: 0.09106674051023972
pr_and_closeness: 0.07296183327312206
weighted: 0.07293279845306287
