In [1]:
%load_ext autoreload



In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.investment_and_cofound.baseline as baseline
import experiments.founder_rank.data.investment_and_cofound.random as random
import experiments.founder_rank.data.investment_and_cofound.naive as naive
import experiments.founder_rank.data.investment_and_cofound.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'], power_dcg=False)
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.57454517, 0.16183291],
       [0.57454517, 1.        , 0.16202516],
       [0.16183291, 0.16202516, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.10106372204279035, coeff=[41.98207943]


only_betweenness: r^2=-0.5643993618267507, coeff=[62.98753644]


only_closeness: r^2=-0.24430563840270803, coeff=[7.6272336]


pr_and_closeness: r^2=0.012302550255162248, coeff=[33.11544536  4.7490373 ]


weighted: r^2=0.03588064392600687, coeff=[ 37.61174324 -63.25918829   4.74786034]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

1803120882.6055367
baseline: 1.0
random: 0.952223680334966
naive: 0.9726146508370804
only_pr: 0.9639050292229764
only_betweenness: 0.9666255491116394
only_closeness: 0.9728868277889354
pr_and_closeness: 0.9718675463983085
weighted: 0.9713188105382154


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 10
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 20
baseline: 1.0
random: 0.0
naive: 0.0
only_pr: 0.0
only_betweenness: 0.0
only_closeness: 0.0
pr_and_closeness: 0.0
weighted: 0.0
n = 50
baseline: 1.0
random: 0.0
naive: 0.02
only_pr: 0.02
only_betweenness: 0.02
only_closeness: 0.02
pr_and_closeness: 0.02
weighted: 0.0


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=0.0)
random: KendalltauResult(correlation=-0.0011156797449876057, pvalue=0.4160379505058296)
naive: KendalltauResult(correlation=0.21874994636037004, pvalue=0.0)


only_pr: KendalltauResult(correlation=0.10864452090002644, pvalue=0.0)


only_betweenness: KendalltauResult(correlation=0.10210649594105105, pvalue=0.0)
only_closeness: KendalltauResult(correlation=0.2447493098926, pvalue=0.0)


pr_and_closeness: KendalltauResult(correlation=0.19428136066025287, pvalue=0.0)
weighted: KendalltauResult(correlation=0.1928616696559311, pvalue=0.0)


In [16]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=-0.001677123237573021, pvalue=0.4150335017596123)
naive: SpearmanrResult(correlation=0.31538089047266643, pvalue=0.0)


only_pr: SpearmanrResult(correlation=0.16685988762607853, pvalue=0.0)
only_betweenness: SpearmanrResult(correlation=0.14459547289399963, pvalue=0.0)
only_closeness: SpearmanrResult(correlation=0.35730751669923916, pvalue=0.0)


pr_and_closeness: SpearmanrResult(correlation=0.28438804987810185, pvalue=0.0)
weighted: SpearmanrResult(correlation=0.2825024420538625, pvalue=0.0)


In [17]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.30832605880776437
naive: 0.33882752090101576
only_pr: 0.24104172617185976
only_betweenness: 0.36002951005844636
only_closeness: 0.24082281617850881
pr_and_closeness: 0.20326171161160123
weighted: 0.19741332031333797


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.2770558819972338
naive: 0.20948839672996178
only_pr: 0.159347902362262
only_betweenness: 0.22043264703356732
only_closeness: 0.15443754216880398
pr_and_closeness: 0.14436619493368347
weighted: 0.14440584977840926
