In [1]:
%load_ext autoreload



In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.email_and_investment.baseline as baseline
import experiments.founder_rank.data.email_and_investment.random as random
import experiments.founder_rank.data.email_and_investment.naive as naive
import experiments.founder_rank.data.email_and_investment.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'], power_dcg=True)
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.86637874, 0.50515001],
       [0.86637874, 1.        , 0.46089968],
       [0.50515001, 0.46089968, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=0.20074377657578757, coeff=[1.09240424]
only_betweenness: r^2=0.1239092056140283, coeff=[2.82517662]
only_closeness: r^2=0.1365632094495306, coeff=[0.25417921]
pr_and_closeness: r^2=0.25736841572040037, coeff=[0.73266927 0.12414112]
weighted: r^2=0.2621030605317348, coeff=[0.51746646 0.64282569 0.12691438]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

2.1435899981703272e+188
baseline: 1.0
random: 0.167361256290268
naive: 0.40055521003852784
only_pr: 0.5233049575018469
only_betweenness: 0.3722327356938505
only_closeness: 0.32367747808404357
pr_and_closeness: 0.5254517088597782
weighted: 0.39817527429335625


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.0
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.0
naive: 0.2
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.2
n = 20
baseline: 1.0
random: 0.0
naive: 0.15
only_pr: 0.15
only_betweenness: 0.2
only_closeness: 0.2
pr_and_closeness: 0.1
weighted: 0.1
n = 50
baseline: 1.0
random: 0.12
naive: 0.34
only_pr: 0.3
only_betweenness: 0.3
only_closeness: 0.26
pr_and_closeness: 0.32
weighted: 0.32


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=4.663665584756559e-306)
random: KendalltauResult(correlation=0.012471794871794874, pvalue=0.6409391143150687)
naive: KendalltauResult(correlation=0.5910871794871796, pvalue=2.9013200840505544e-108)
only_pr: KendalltauResult(correlation=0.4918461538461539, pvalue=1.5024737153349168e-75)
only_betweenness: KendalltauResult(correlation=0.5301333333333333, pvalue=1.830624679442366e-87)
only_closeness: KendalltauResult(correlation=0.5959179487179488, pvalue=5.222074919615367e-110)
pr_and_closeness: KendalltauResult(correlation=0.543671794871795, pvalue=6.873712483871131e-92)
weighted: KendalltauResult(correlation=0.5527179487179488, pvalue=6.581487863777273e-95)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=0.01722280658638486, pvalue=0.6673847059246989)
naive: SpearmanrResult(correlation=0.7733822560825757, pvalue=1.957267273461226e-125)
only_pr: SpearmanrResult(correlation=0.6640197591545836, pvalue=1.0448630784615618e-80)
only_betweenness: SpearmanrResult(correlation=0.7068505283853527, pvalue=9.586410547982706e-96)
only_closeness: SpearmanrResult(correlation=0.780197444089457, pvalue=4.7413889543749717e-129)
pr_and_closeness: SpearmanrResult(correlation=0.7244870975669698, pvalue=9.282255298624605e-103)
weighted: SpearmanrResult(correlation=0.7323368395183092, pvalue=4.64553616679605e-106)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.40477370053325157
naive: 0.2354368168505992
only_pr: 0.1123087765422025
only_betweenness: 0.13895131337704006
only_closeness: 0.20552558207426372
pr_and_closeness: 0.13598175967783058
weighted: 0.1347795197707724


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.35873012703101553
naive: 0.19986531186838594
only_pr: 0.05700754659728666
only_betweenness: 0.06399980713742542
only_closeness: 0.14262043163552413
pr_and_closeness: 0.09502095746392956
weighted: 0.09394760456323875
