In [1]:
%load_ext autoreload



In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.email_and_investment.baseline as baseline
import experiments.founder_rank.data.email_and_investment.random as random
import experiments.founder_rank.data.email_and_investment.naive as naive
import experiments.founder_rank.data.email_and_investment.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'], power_dcg=True)
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.85731231, 0.51810301],
       [0.85731231, 1.        , 0.47359124],
       [0.51810301, 0.47359124, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.16765133546606958, coeff=[1.50475464]
only_betweenness: r^2=-0.38979016065395866, coeff=[2.64292883]
only_closeness: r^2=0.3428048821764943, coeff=[0.47095821]
pr_and_closeness: r^2=0.38507247000554357, coeff=[0.41761361 0.39276886]
weighted: r^2=0.3893934239771776, coeff=[0.23130514 0.3969971  0.39494439]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

1.8592679463475738e+170
baseline: 1.0
random: 0.17437376007025185
naive: 0.4035451449724314
only_pr: 0.5261198672450327
only_betweenness: 0.4525769836150943
only_closeness: 0.40587442408725594
pr_and_closeness: 0.5311447991259711
weighted: 0.40486252857796146


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.4
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.1
only_betweenness: 0.3
only_closeness: 0.4
pr_and_closeness: 0.2
weighted: 0.3
n = 20
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.35
pr_and_closeness: 0.2
weighted: 0.2
n = 50
baseline: 1.0
random: 0.1
naive: 0.44
only_pr: 0.34
only_betweenness: 0.46
only_closeness: 0.4
pr_and_closeness: 0.42
weighted: 0.44


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=0.9999999999999999, pvalue=1.0113376142855542e-276)
random: KendalltauResult(correlation=0.08727797652670556, pvalue=0.0019205579361579966)
naive: KendalltauResult(correlation=0.4865248226950354, pvalue=5.2867003378741474e-67)
only_pr: KendalltauResult(correlation=0.3940751898575284, pvalue=1.4085810520268382e-44)
only_betweenness: KendalltauResult(correlation=0.4442477876106194, pvalue=3.6139020914644973e-56)
only_closeness: KendalltauResult(correlation=0.503395468524446, pvalue=1.3388577789769523e-71)
pr_and_closeness: KendalltauResult(correlation=0.48931149187221484, pvalue=9.43371118384229e-68)
weighted: KendalltauResult(correlation=0.4944078327998493, pvalue=3.9338177462744516e-69)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=0.13119108084893477, pvalue=0.001778131971169338)
naive: SpearmanrResult(correlation=0.6728925165650597, pvalue=9.755423627607468e-76)
only_pr: SpearmanrResult(correlation=0.5589049462646032, pvalue=9.817869398306893e-48)
only_betweenness: SpearmanrResult(correlation=0.6228071879617808, pvalue=5.292645641667232e-62)
only_closeness: SpearmanrResult(correlation=0.6950454187688331, pvalue=1.0242510067882187e-82)
pr_and_closeness: SpearmanrResult(correlation=0.6769850959372941, pvalue=5.5735475342926996e-77)
weighted: SpearmanrResult(correlation=0.6828321511523852, pvalue=8.603725759373953e-79)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.27005150088695445
naive: 0.08857031362788999
only_pr: 0.15240612814010734
only_betweenness: 0.20759500738536904
only_closeness: 0.13680591913033702
pr_and_closeness: 0.10522638611360567
weighted: 0.10339011621827623


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.2527288354593246
naive: 0.059927085965167624
only_pr: 0.12035913292282303
only_betweenness: 0.16574119651820599
only_closeness: 0.09666375666496407
pr_and_closeness: 0.07894007565695513
weighted: 0.07901385597790482
