In [25]:
import networkx as nx
import numpy as np
import pandas as pd
import torch

from itertools import product
import multiprocessing

from fairpair import *

import sys
sys.path.append('../GNNRank/')
from src.param_parser import ArgsNamespace # just import the class, not the parser
from src.Trainer_fair import Trainer

could install torchsort from: https://github.com/teddykoker/torchsort/releases/download/v0.1.9/torchsort-0.1.9+pt113cu117-cp311-cp311-linux_x86_64.whl

…or we just keep the "raw" scores instead of converting them to ranks to keep them differentiable

In [26]:
args = ArgsNamespace(AllTrain=True, ERO_style='uniform', F=70, Fiedler_layer_num=5, K=20, N=350, SavePred=False, all_methods=['DIGRAC', 'ib'],
                     alpha=1.0, baseline='syncRank', cuda=True, data_path='/home/georg/fairpair/GNNRank/data/',
                     dataset=f'fairGNNRank_test', be_silent=True,
                     debug=False, device=torch.device(type='cpu'), dropout=0.5, early_stopping=500, epochs=1000, eta=0.1, fill_val=0.5, hidden=8, hop=2,
                     load_only=True, log_root='/home/georg/fairpair/GNNRank/logs/', lr=0.1, no_cuda=False, num_trials=1, optimizer='Adam', p=0.05,
                     pretrain_epochs=50, pretrain_with='dist', regenerate_data=True, season=1990, seed=31, seeds=[10], sigma=1.0, tau=0.5, test_ratio=1,
                     train_ratio=1, train_with='proximal_baseline', trainable_alpha=False, upset_margin=0.01, upset_margin_coeff=0, upset_ratio_coeff=1.0, weight_decay=0.0005,
                     exposure_coeff=0.9)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x7f1e2638c390>

In [27]:
G = FairPairGraph()
G.generate_groups(40, 20) # same size groups
G.assign_skills(loc=0, scale=0.86142674, seed=42) # general skill distribution
G.assign_bias(nodes=G.minority_nodes, loc=-1.43574282, scale=0.43071336, seed=42) # add bias to unprivileged group

In [28]:
sampler = RandomSampling(G, warn=False)
sampler.apply(iter=500, k=1)

In [29]:
groups = np.array([group for node, group in G.nodes(data='minority')])
adj = nx.linalg.graphmatrix.adjacency_matrix(G, weight='weight') # returns a sparse matrix

trainer = Trainer(args, random_seed=10, save_name_base='test', adj=adj, groups=groups) # initialize with the given adjacency matrix

In [30]:
trainer.groups

tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True])

In [31]:
save_path_best, save_path_latest = trainer.train(model_name='ib')

  0%|          | 0/1000 [00:00<?, ?epochs/s]

In [32]:
score, pred_label = trainer.predict_nn(model_name='ib', model_path=save_path_best, A=None, GNN_variant='proximal_baseline')
ranking = {key: 1-score[0] for key, score in enumerate(score.cpu().detach().numpy())}

In [33]:
ranking_as_ranks = scores_to_rank(ranking, invert=False)
base_scores = [skill for node, skill in G.nodes(data='skill')]
all_nodes = list(range(40))
majority_nodes = list(range(20))
minority_nodes = list(range(20,40))
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=all_nodes, complementary_nodes=[])
print('Overall error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=majority_nodes, complementary_nodes=minority_nodes)
print('Majority error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=minority_nodes, complementary_nodes=majority_nodes)
print('Minority error', tau)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=majority_nodes)
print('Majority exposure', exp)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=minority_nodes)
print('Minority exposure', exp)

Overall error 0.4107289246411511
Majority error 0.4510969642998299
Minority error 0.4792292247362133
Majority exposure 0.34073936545202393
Minority exposure 0.21381226908065512


In [34]:
trainer.features.shape

torch.Size([40, 41])

In [35]:
trainer.features[0,:]

tensor([ -0.9098,  -0.3698,  -1.0860,   0.2894,   1.4565,  -0.5377,   0.4804,
         -0.9171,   0.0276,  -0.3004,  -1.3027,   0.2954,   0.6090,  -0.2328,
         -0.3737,  -0.7023,   0.0684,   0.0588,  -1.0150,  -0.5431,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000, -10.0000])

In [36]:
ranker = RankRecovery(G)
scores, nodes = ranker.apply()
ranking_as_ranks = scores_to_rank(scores, invert=False)
base_scores = [skill for node, skill in G.nodes(data='skill')]
all_nodes = list(range(40))
majority_nodes = list(range(20))
minority_nodes = list(range(20,40))
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=all_nodes, complementary_nodes=[])
print('Overall error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=majority_nodes, complementary_nodes=minority_nodes)
print('Majority error', tau)
tau = weighted_tau_nodes(base_scores, ranking_as_ranks, subgraph_nodes=minority_nodes, complementary_nodes=majority_nodes)
print('Minority error', tau)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=majority_nodes)
print('Majority exposure', exp)
exp = exposure_nodes(ranking_as_ranks, subgraph_nodes=minority_nodes)
print('Minority exposure', exp)

Overall error 0.38624295229956584
Majority error 0.41828713175933113
Minority error 0.45099082667388324
Majority exposure 0.34213817706944416
Minority exposure 0.2124134574632348
