In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import torch

from itertools import product
import multiprocessing

from fairpair import *

import sys
sys.path.append('../GNNRank/')
from src.param_parser import ArgsNamespace # just import the class, not the parser
from src.Trainer_fair import Trainer

could install torchsort from: https://github.com/teddykoker/torchsort/releases/download/v0.1.9/torchsort-0.1.9+pt113cu117-cp311-cp311-linux_x86_64.whl

…or we just keep the "raw" scores instead of converting them to ranks to keep them differentiable

In [2]:
args = ArgsNamespace(AllTrain=True, ERO_style='uniform', F=70, Fiedler_layer_num=5, K=20, N=350, SavePred=False, all_methods=['DIGRAC', 'ib'],
                     alpha=1.0, baseline='syncRank', cuda=True, data_path='/home/georg/fairpair/GNNRank/data/',
                     dataset=f'fairGNNRank_test', be_silent=True,
                     debug=False, device=torch.device(type='cpu'), dropout=0.5, early_stopping=200, epochs=1000, eta=0.1, fill_val=0.5, hidden=8, hop=2,
                     load_only=True, log_root='/home/georg/fairpair/GNNRank/logs/', lr=0.1, no_cuda=False, num_trials=1, optimizer='Adam', p=0.05,
                     pretrain_epochs=50, pretrain_with='dist', regenerate_data=True, season=1990, seed=31, seeds=[10], sigma=1.0, tau=0.5, test_ratio=1,
                     train_ratio=1, train_with='proximal_baseline', trainable_alpha=False, upset_margin=0.01, upset_margin_coeff=0, upset_ratio_coeff=1.0, weight_decay=0.0005,
                     exposure_coeff=0.9)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x7f79e91b9390>

In [3]:
G = FairPairGraph()
G.generate_groups(40, 20) # same size groups
G.assign_skills(loc=0, scale=0.86142674) # general skill distribution
G.assign_bias(nodes=G.minority_nodes, loc=-1.43574282, scale=0.43071336) # add bias to unprivileged group

In [4]:
sampler = RandomSampling(G, warn=False)
sampler.apply(iter=500, k=1)

In [5]:
groups = np.array([group for node, group in G.nodes(data='minority')])
adj = nx.linalg.graphmatrix.adjacency_matrix(G, weight='weight') # returns a sparse matrix

trainer = Trainer(args, random_seed=10, save_name_base='test', adj=adj, groups=groups) # initialize with the given adjacency matrix

In [6]:
trainer.groups

tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True])

In [7]:
save_path_best, save_path_latest = trainer.train(model_name='ib')

  0%|          | 0/1000 [00:00<?, ?epochs/s]

train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.6170, 0.0007, 0.6192
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 0.7699
train/exposure/combined loss: 0.7699, 0.0000, 

In [8]:
score, pred_label = trainer.predict_nn(model_name='ib', model_path=save_path_best, A=None, GNN_variant='proximal_baseline')
ranking = {key: 1-score[0] for key, score in enumerate(score.cpu().detach().numpy())}

In [9]:
ranking_as_ranks = scores_to_rank(ranking, invert=False)
base_scores = [skill for node, skill in G.nodes(data='skill')]
all_nodes = list(range(40))
majority_nodes = list(range(20))
minority_nodes = list(range(20,40))
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=all_nodes, complementary_nodes=[])
print('Overall error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=majority_nodes, complementary_nodes=minority_nodes)
print('Majority error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=minority_nodes, complementary_nodes=majority_nodes)
print('Minority error', tau)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=majority_nodes)
print('Majority exposure', exp)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=minority_nodes)
print('Minority exposure', exp)

Overall error 0.27756810996904957
Majority error 0.29423872742534846
Minority error 0.3373318282734238
Majority exposure 0.3429486601082835
Minority exposure 0.21160297442439538


In [10]:
trainer.features.shape

torch.Size([40, 41])

In [11]:
trainer.features[0,:]

tensor([ -0.2308,  -1.5952,  -0.7228,   1.0095,  -0.3774,   2.2280,   0.0453,
         -0.1164,  -0.7551,   2.2361,   1.9317,   0.1265,   1.9759,   0.2779,
         -0.3362,   0.6641,  -0.3251,   0.0656,   0.7208,  -0.7944,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000, -10.0000])

In [12]:
ranker = RankRecovery(G)
scores, nodes = ranker.apply()
ranking_as_ranks = scores_to_rank(scores, invert=False)
base_scores = [skill for node, skill in G.nodes(data='skill')]
all_nodes = list(range(40))
majority_nodes = list(range(20))
minority_nodes = list(range(20,40))
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=all_nodes, complementary_nodes=[])
print('Overall error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=majority_nodes, complementary_nodes=minority_nodes)
print('Majority error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=minority_nodes, complementary_nodes=majority_nodes)
print('Minority error', tau)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=majority_nodes)
print('Majority exposure', exp)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=minority_nodes)
print('Minority exposure', exp)

Overall error 0.26621178675828944
Majority error 0.28049463306296935
Minority error 0.32407783610015917
Majority exposure 0.34236361501154333
Minority exposure 0.21218801952113564
