In [1]:
%load_ext autoreload



In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.public.baseline as baseline
import experiments.founder_rank.data.public.random as random
import experiments.founder_rank.data.public.naive as naive
import experiments.founder_rank.data.public.graph_metrics as graph_metrics

In [37]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [43]:
metrics = metric.Metrics(datasets['baseline'], power_dcg=False)
learner = learn.Learner(datasets['baseline'])

In [44]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.57831298, 0.12559522],
       [0.57831298, 1.        , 0.16451425],
       [0.12559522, 0.16451425, 1.        ]])

In [45]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.13469855889867488, coeff=[57.40372574]


only_betweenness: r^2=-0.568618244655507, coeff=[91.58994046]


only_closeness: r^2=-0.1611648440224529, coeff=[0.81596813]


pr_and_closeness: r^2=0.04173914664102063, coeff=[41.72022057  0.56968665]


weighted: r^2=0.06680371418467679, coeff=[  48.32209038 -102.73220238    0.56982867]


In [46]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

1803120882.6055367
baseline: 1.0
random: 0.9525226472105175
naive: 0.9724056551953735
only_pr: 0.9606067920960051
only_betweenness: 0.9601154173010412
only_closeness: 0.9732083638709818
pr_and_closeness: 0.9714278014121187
weighted: 0.9711439742848593


In [47]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 10
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 20
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 50
baseline: 1.0
random: 0.0
naive: 0.04
only_pr: 0.02
only_betweenness: 0.02
only_closeness: 0.04
pr_and_closeness: 0.02
weighted: 0.04


In [48]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=0.0)
random: KendalltauResult(correlation=-0.002101690945191333, pvalue=0.1254972941765598)
naive: KendalltauResult(correlation=0.19646417676574224, pvalue=0.0)


only_pr: KendalltauResult(correlation=0.06565058699698476, pvalue=0.0)
only_betweenness: KendalltauResult(correlation=0.058010898449219785, pvalue=0.0)
only_closeness: KendalltauResult(correlation=0.22447604757013556, pvalue=0.0)


pr_and_closeness: KendalltauResult(correlation=0.1804335588801785, pvalue=0.0)
weighted: KendalltauResult(correlation=0.17794678461160882, pvalue=0.0)


In [49]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=-0.0031566060735662245, pvalue=0.12500830788573242)


naive: SpearmanrResult(correlation=0.2866418715834115, pvalue=0.0)


only_pr: SpearmanrResult(correlation=0.10056001978280668, pvalue=0.0)
only_betweenness: SpearmanrResult(correlation=0.06908893594516373, pvalue=9.536325566459976e-248)
only_closeness: SpearmanrResult(correlation=0.3328461191322523, pvalue=0.0)


pr_and_closeness: SpearmanrResult(correlation=0.2665309669515533, pvalue=0.0)
weighted: SpearmanrResult(correlation=0.26368731829099146, pvalue=0.0)


In [50]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.30834491798775665
naive: 0.19532538950468056
only_pr: 0.2424218508371321
only_betweenness: 0.36020289284105356
only_closeness: 0.211302624132106
pr_and_closeness: 0.18301138994497348
weighted: 0.17767162884143872


In [51]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.27720060090517534
naive: 0.1279168415539647
only_pr: 0.1582038220172902
only_betweenness: 0.2205776831817472
only_closeness: 0.13888925485228168
pr_and_closeness: 0.1327591535517456
weighted: 0.13287532087670265
