In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

import tensorflow as tf
import numpy as np
from sklearn.datasets import make_classification
from fairlearn.metrics import demographic_parity_ratio
from sklearn.model_selection import train_test_split
from pprint import pprint
from tqdm import tqdm

from utils import set_seed, generate_sensitive_feature
from KnowledgeBase import KnowledgeBase


In [None]:
set_seed(42)


In [None]:
wandb_hp = dict(
    dataset_SAMPLES=500,
    dataset_FEATURES=3,
    dataset_n_informative=2,
    dataset_n_redundant=1,
    dataset_flip_y=0.001,
    perfect_classifier_demographic_parity=0.5,
    hidden_layer_sizes=(10, 5)
)


In [None]:
X, Y = make_classification(
    n_samples=wandb_hp['dataset_SAMPLES'],
    n_features=wandb_hp['dataset_FEATURES'],
    n_informative=wandb_hp['dataset_n_informative'],
    n_redundant=wandb_hp['dataset_n_redundant'],
    flip_y=wandb_hp['dataset_flip_y']
)

p_attribute = generate_sensitive_feature(
    Y, wandb_hp['perfect_classifier_demographic_parity'])

# 0: privileged, 1: UNprivileged
X = np.hstack([X, np.expand_dims(p_attribute, axis=-1)])

Xtrain, Xtest, Ytrain, Ytest = train_test_split(
    X,
    Y,
    test_size=0.2,
    stratify=np.char.add(p_attribute.astype(str), Y.astype(str)),
    shuffle=True
)


In [None]:
wandb_hp['ground_truth_demographic_parity'] = demographic_parity_ratio(Y, Y, sensitive_features=p_attribute)
wandb_hp['trainset_demographic_parity'] = demographic_parity_ratio(Ytrain, Ytrain, sensitive_features=Xtrain[:, Xtrain.shape[1]-1])
wandb_hp['testset_demographic_parity'] = demographic_parity_ratio(Ytest, Ytest, sensitive_features=Xtest[:, Xtest.shape[1]-1])
print(f"Ground truth demographic parity: {wandb_hp['ground_truth_demographic_parity']}")
print(f"Trainset demographic parity: {wandb_hp['trainset_demographic_parity']}")
print(f"Testset demographic parity: {wandb_hp['testset_demographic_parity']}")

In [None]:
# Init the knowledge base
kb = KnowledgeBase(
    Xtrain, Xtest,
    Ytrain, Ytest,
    {'positive':1, 'negative':0},
    0,
    1,
    hidden_layer_sizes=(50, 50),
    sensitive_feature_index=Xtrain.shape[1]-1,
    config_file='./KnowledgeBaseAxioms.json'
)


In [None]:
wandb_hp['learning_rate'] = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=wandb_hp['learning_rate'])
wandb_hp['optimizer'] = optimizer.__class__
wandb_hp['epochs'] = 100


In [None]:
for epoch in tqdm(tf.range(wandb_hp['epochs'])):

    with tf.GradientTape() as tape:
        loss = 1. - kb.train_step()  # type: ignore
    grads = tape.gradient(loss, kb.trainable_variables)
    optimizer.apply_gradients(zip(grads, kb.trainable_variables))

In [None]:
kb.get_logs()