In [1]:
%load_ext autoreload



In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.email_and_investment.baseline as baseline
import experiments.founder_rank.data.email_and_investment.random as random
import experiments.founder_rank.data.email_and_investment.naive as naive
import experiments.founder_rank.data.email_and_investment.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'], power_dcg=True)
learner = learn.Learner(datasets['baseline'])

In [7]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.86637874, 0.50515001],
       [0.86637874, 1.        , 0.46089968],
       [0.50515001, 0.46089968, 1.        ]])

In [8]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.14339536325205726, coeff=[1.49937105]
only_betweenness: r^2=-0.35250386253765553, coeff=[3.77276408]
only_closeness: r^2=0.29661197367324565, coeff=[0.4373447]
pr_and_closeness: r^2=0.3523744437811558, coeff=[0.47775398 0.35255035]
weighted: r^2=0.3566770330143395, coeff=[0.28085778 0.5881426  0.3550877 ]


In [9]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

2.1435899981703272e+188
baseline: 1.0
random: 0.2050962238136009
naive: 0.40207493336888495
only_pr: 0.522447685457687
only_betweenness: 0.3804559903074155
only_closeness: 0.33532696371406806
pr_and_closeness: 0.5300773049265486
weighted: 0.40402314237049924


In [10]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.2
naive: 0.2
only_pr: 0.2
only_betweenness: 0.4
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.4
n = 10
baseline: 1.0
random: 0.1
naive: 0.2
only_pr: 0.1
only_betweenness: 0.2
only_closeness: 0.3
pr_and_closeness: 0.2
weighted: 0.2
n = 20
baseline: 1.0
random: 0.05
naive: 0.2
only_pr: 0.15
only_betweenness: 0.2
only_closeness: 0.25
pr_and_closeness: 0.15
weighted: 0.2
n = 50
baseline: 1.0
random: 0.08
naive: 0.36
only_pr: 0.3
only_betweenness: 0.34
only_closeness: 0.32
pr_and_closeness: 0.34
weighted: 0.36


In [11]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=4.663665584756559e-306)
random: KendalltauResult(correlation=0.02798974358974359, pvalue=0.29524530991197917)
naive: KendalltauResult(correlation=0.49026666666666674, pvalue=4.459063590685215e-75)
only_pr: KendalltauResult(correlation=0.4077025641025641, pvalue=1.746354685381768e-52)
only_betweenness: KendalltauResult(correlation=0.439323076923077, pvalue=1.1937161773347303e-60)
only_closeness: KendalltauResult(correlation=0.5070974358974359, pvalue=3.445930979275169e-80)
pr_and_closeness: KendalltauResult(correlation=0.48546666666666677, pvalue=1.1903903655343325e-73)
weighted: KendalltauResult(correlation=0.4906256410256411, pvalue=3.4834078427928916e-75)


In [12]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=0.040921848119931194, pvalue=0.307052599768919)
naive: SpearmanrResult(correlation=0.6745325141312363, pvalue=3.661527661420181e-84)
only_pr: SpearmanrResult(correlation=0.5748610960924061, pvalue=2.9640386003504695e-56)
only_betweenness: SpearmanrResult(correlation=0.6145351191939052, pvalue=3.5966873396555386e-66)
only_closeness: SpearmanrResult(correlation=0.695817006635537, pvalue=1.2863946528100306e-91)
pr_and_closeness: SpearmanrResult(correlation=0.6699613172769723, pvalue=1.2123256280863849e-82)
weighted: SpearmanrResult(correlation=0.674531137871713, pvalue=3.6654226887836825e-84)


In [13]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.2528667781399638
naive: 0.11133291983782746
only_pr: 0.14230536458600626
only_betweenness: 0.1955745389131885
only_closeness: 0.14753239584702885
pr_and_closeness: 0.1096812130443369
weighted: 0.10827080650623466


In [14]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.23501938414445367
naive: 0.07781549865554455
only_pr: 0.11381767558365218
only_betweenness: 0.1566008728642685
only_closeness: 0.105059932466047
pr_and_closeness: 0.08319278577168956
weighted: 0.0831947037957496
