# Applying Ranking Recovery Algorithms

In [1]:
import pandas as pd
import seaborn as sns

from fairpair import *

In [2]:
# create a FairPairGraph with an unprivileged group
G = FairPairGraph()
G.generate_groups(20, 5)
G.assign_skills()
G.assign_bias(nodes=G.unpriv_nodes, loc=-0.5, scale=0.5)

In [3]:
# run 1000 iterations of RandomSampling, random pairing and comparison with the BTL model
sampler = RandomSampling(G, warn=False)
sampler.apply(iter=1000, k=1)

# apply davidScore for ranking recovery
ranker = RankRecovery(G)
ranking, other_nodes = ranker.apply(rank_using=davidScore) # by default, apply rankCentrality method
ranker._print_with_score(ranking) # sorted by rank score

Unnamed: 0,node,perceived score,rank score
0,7,2.229864,147.055833
1,9,2.087246,134.045921
2,0,1.469814,113.877209
3,12,0.191428,55.730519
4,3,0.38818,49.869408
5,10,0.270668,46.369908
6,11,-0.24739,3.18643
7,2,-0.319708,2.655635
8,14,-0.097918,1.850538
9,18,-0.588904,-7.771035


In [4]:
# Apply another ranking recovery method
ranking, other_nodes = ranker.apply(rank_using=SVD_RS)
ranker._print_with_score(ranking)

Unnamed: 0,node,perceived score,rank score
0,7,2.229864,0.897572
1,9,2.087246,0.86596
2,0,1.469814,0.701275
3,12,0.191428,0.338344
4,10,0.270668,0.304092
5,3,0.38818,0.303626
6,11,-0.24739,0.01386
7,2,-0.319708,0.013377
8,14,-0.097918,-0.016111
9,18,-0.588904,-0.041467


## Measuring Error

In [5]:
print('Overall error:', weighted_tau(G, ranking))
print('Privileged group\'s error:', weighted_tau(G, ranking, G.priv)) # by supplying a subgraph of G we obtain the group-conditioned error
print('Unprivileged group\'s error:', weighted_tau(G, ranking, G.unpriv))

Overall error: 0.09368783051682861
Privileged group's error: 0.09393977410080034
Unprivileged group's error: 0.14031894800714803


In [6]:
# To speed up evaluation on very large graphs, we can cache the group membership of nodes and their ground-truth skill scores
unpriv_nodes = G.unpriv_nodes.copy()
priv_nodes = G.priv_nodes.copy()
skills = {node: score for node, score in G.nodes(data='skill')}.copy()
weighted_tau_nodes(skills, ranking, unpriv_nodes, priv_nodes) # calculate the unprivileged group's error, but including between-group comparisons

0.14031894800714803

In [7]:
# We can also look at within-group and between-groups error separately
within_priv, between = weighted_tau_separate(G, ranking, G.priv)
within_unpriv,_ = weighted_tau_separate(G, ranking, G.unpriv, calc_between=False) # do not calculate between-groups error again
print('Between-groups error:', between)
print('Privileged group\'s within-group error:', within_priv)
print('Unprivileged group\'s within-group error:', within_unpriv)

Between-groups error: 0.14285464584997543
Privileged group's within-group error: 0.04353803595025691
Unprivileged group's within-group error: 0.0803030585068736


## Measuring Group Representation

In [8]:
print('Privileged group\'s exposure:', exposure(G, ranking, G.priv))
print('Unprivileged group\'s exposure:', exposure(G, ranking, G.unpriv))

Privileged group's exposure: 0.3871164120254849
Unprivileged group's exposure: 0.24670444030824762


In [9]:
# Again, to speed up evaluation on very large graphs, we can cache the group membership of nodes
unpriv_nodes = G.unpriv_nodes.copy()
exposure_nodes(ranking, unpriv_nodes) # calculate the unprivileged group's exposure

0.24670444030824762