In [1]:
%load_ext autoreload


In [2]:
%autoreload 2

In [26]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [27]:
import experiments.founder_rank.data.private.baseline as baseline
import experiments.founder_rank.data.private.random as random
import experiments.founder_rank.data.private.naive as naive
import experiments.founder_rank.data.private.graph_metrics as graph_metrics

In [28]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [29]:
metrics = metric.Metrics(datasets['baseline'])
learner = learn.Learner(datasets['baseline'])

In [30]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)
np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.91689477, 0.53587024],
       [0.91689477, 1.        , 0.51547319],
       [0.53587024, 0.51547319, 1.        ]])

In [31]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=0.284167629740967, coeff=[1.12954155]
only_betweenness: r^2=0.23172539863346697, coeff=[1.23169549]
only_closeness: r^2=0.18342013638410948, coeff=[0.26854765]
pr_and_closeness: r^2=0.33687978288834497, coeff=[0.7906443  0.11946746]
weighted: r^2=0.3417942232357143, coeff=[0.52114899 0.31690271 0.1221938 ]


In [32]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

2.1435899981703272e+188
baseline: 1.0
random: 0.15677292454310263
naive: 0.3996780768702226
only_pr: 0.5299199836192711
only_betweenness: 0.36620825305498395
only_closeness: 0.34776865481884295
pr_and_closeness: 0.5307132297802482
weighted: 0.44480749301678246


In [33]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.3
pr_and_closeness: 0.2
weighted: 0.2
n = 20
baseline: 1.0
random: 0.0
naive: 0.15
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.1
weighted: 0.15
n = 50
baseline: 1.0
random: 0.04
naive: 0.38
only_pr: 0.4
only_betweenness: 0.34
only_closeness: 0.24
pr_and_closeness: 0.4
weighted: 0.38


In [34]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=4.663665584756559e-306)
random: KendalltauResult(correlation=-0.005405128205128206, pvalue=0.839818034740744)
naive: KendalltauResult(correlation=0.5952820512820514, pvalue=8.878133659551435e-110)
only_pr: KendalltauResult(correlation=0.5065025641025642, pvalue=5.259071184274815e-80)
only_betweenness: KendalltauResult(correlation=0.542523076923077, pvalue=1.6481313337823185e-91)
only_closeness: KendalltauResult(correlation=0.5877435897435898, pvalue=4.591277154602555e-107)
pr_and_closeness: KendalltauResult(correlation=0.5595589743589745, pvalue=3.1799603981988076e-97)
weighted: KendalltauResult(correlation=0.5669846153846154, pvalue=9.047145815926624e-100)


In [35]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=-0.007277856967313837, pvalue=0.8559087346599529)
naive: SpearmanrResult(correlation=0.7732623740476777, pvalue=2.260049549449023e-125)
only_pr: SpearmanrResult(correlation=0.6833374784959451, pvalue=3.6081515753429006e-87)
only_betweenness: SpearmanrResult(correlation=0.7211024821823544, pvalue=2.2682480239823563e-101)
only_closeness: SpearmanrResult(correlation=0.7723509461784223, pvalue=6.726493461603771e-125)
pr_and_closeness: SpearmanrResult(correlation=0.7401626935364954, pvalue=1.8108454360948637e-109)
weighted: SpearmanrResult(correlation=0.748037748832637, pvalue=5.0191638842095536e-113)


In [36]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.419132993880088
naive: 0.16966676223686575
only_pr: 0.08964866457612464
only_betweenness: 0.10651554996056374
only_closeness: 0.18460861880377988
pr_and_closeness: 0.11052061117490375
weighted: 0.1092997661835526


In [37]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.3750072066597993
naive: 0.13905167365619464
only_pr: 0.04744860051416035
only_betweenness: 0.05194630011031918
only_closeness: 0.12598179945222907
pr_and_closeness: 0.07787960212403051
weighted: 0.07637111563931642
