In [1]:
%load_ext autoreload



In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.investment_and_cofound.baseline as baseline
import experiments.founder_rank.data.investment_and_cofound.random as random
import experiments.founder_rank.data.investment_and_cofound.naive as naive
import experiments.founder_rank.data.investment_and_cofound.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'], power_dcg=False)
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.65859893, 0.3435863 ],
       [0.65859893, 1.        , 0.17499308],
       [0.3435863 , 0.17499308, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.006481642362117901, coeff=[30.57060321]


only_betweenness: r^2=-0.5962168698993546, coeff=[60.70432762]


only_closeness: r^2=0.2188555498270558, coeff=[0.70325494]


pr_and_closeness: r^2=0.26413075062190283, coeff=[11.28637149  0.54691204]


weighted: r^2=0.2718193972451778, coeff=[ 14.25471651 -26.5096559    0.5233239 ]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

1964609088.4837651
baseline: 1.0
random: 0.9528180967878405
naive: 0.9789751508582549
only_pr: 0.9708847769603616
only_betweenness: 0.9754635262323714
only_closeness: 0.9788452388951241
pr_and_closeness: 0.9789606497449884
weighted: 0.978755115019986


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 10
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 20
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 50
baseline: 1.0
random: 0.0
naive: 0.12
only_pr: 0.12
only_betweenness: 0.12
only_closeness: 0.12
pr_and_closeness: 0.12
weighted: 0.1


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=0.0)


random: KendalltauResult(correlation=0.0008144143335145558, pvalue=0.5437096574294189)
naive: KendalltauResult(correlation=0.3412064981644293, pvalue=0.0)


only_pr: KendalltauResult(correlation=0.1841362850505524, pvalue=0.0)
only_betweenness: KendalltauResult(correlation=0.2994795576841436, pvalue=0.0)
only_closeness: KendalltauResult(correlation=0.3436571033592722, pvalue=0.0)


pr_and_closeness: KendalltauResult(correlation=0.3352705267642315, pvalue=0.0)
weighted: KendalltauResult(correlation=0.3350259694535666, pvalue=0.0)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=0.001218827489270819, pvalue=0.5446306104339929)


naive: SpearmanrResult(correlation=0.4924667544460554, pvalue=0.0)


only_pr: SpearmanrResult(correlation=0.2720134626289287, pvalue=0.0)


only_betweenness: SpearmanrResult(correlation=0.4393838784521557, pvalue=0.0)


only_closeness: SpearmanrResult(correlation=0.4953280457576795, pvalue=0.0)


pr_and_closeness: SpearmanrResult(correlation=0.4886452387016201, pvalue=0.0)
weighted: SpearmanrResult(correlation=0.48771723949425655, pvalue=0.0)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.27900201481672304
naive: 0.1735016630215673
only_pr: 0.23179383887422073
only_betweenness: 0.3885363757334465
only_closeness: 0.16710389854772648
pr_and_closeness: 0.16197747238606908
weighted: 0.16277826407220852


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.24602061654528673
naive: 0.10604265460528023
only_pr: 0.17109361801483874
only_betweenness: 0.24512170213430184
only_closeness: 0.1020106393675559
pr_and_closeness: 0.11979772510829458
weighted: 0.1253377583543128
