In [1]:
import os
os.environ["MKL_NUM_THREADS"] = "1" 
os.environ["NUMEXPR_NUM_THREADS"] = "1" 
os.environ["OMP_NUM_THREADS"] = "1" 
base_path = os.getcwd() + '/data/'
import torch
import random

import get_dataset
import numpy as np
from FairICP import utility_functions
from FairICP import FairICP_learning
import warnings
warnings.filterwarnings('ignore')


In [2]:
# load R
os.environ['R_HOME'] = r"user\R\R-4.3.0"
os.environ['R_USER'] = r"user\anaconda3\Lib\site-packages\rpy2"

from rpy2.robjects.packages import importr
KPC = importr('KPC')
kernlab = importr('kernlab')
import rpy2.robjects
from rpy2.robjects import FloatVector

In [3]:
seed = 123
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# load data
X, A, Y, X_cal, A_cal, Y_cal, X_test, A_test, Y_test = get_dataset.get_train_test_data(base_path, "compas", seed, dim = 2)
input_data_train = np.concatenate((A, X), 1)
input_data_test = np.concatenate((A_test, X_test), 1)

In [4]:
batch_size = 128
lr_loss = 1e-3
lr_dis = 1e-3

# equalized odds penalty
mu_val = 0.8
epochs_list = [200]

# utility loss
cost_pred = torch.nn.CrossEntropyLoss()
# base predictive model
model_type = "deep_model"

In [5]:
model = FairICP_learning.EquiClassLearner(lr_loss = lr_loss,
                                                lr_dis = lr_dis,
                                            epochs = epochs_list[-1],
                                            loss_steps = 1,
                                            dis_steps = 1,
                                            cost_pred = cost_pred,
                                            in_shape = X.shape[1],
                                            batch_size = batch_size,
                                            model_type = model_type,
                                            lambda_vec = mu_val,
                                            num_classes = 2,
                                            A_shape = A.shape[1]
                                            )
model.fit(input_data_train, Y, epochs_list = epochs_list)

In [6]:
log_lik_mat = utility_functions.Class_density_estimation(Y_test, A_test, Y_test, A_test)

y_perm_index = np.squeeze(utility_functions.generate_X_CPT(50, 100, log_lik_mat))
A_perm_index = np.argsort(y_perm_index)
A_tilde_list = A_test[A_perm_index]

In [7]:
for i, cp in enumerate(model.checkpoint_list):
    model.model = model.cp_model_list[i]
    model.dis = model.cp_dis_list[i]

    Yhat_out_train = model.predict(input_data_train)
    Yhat_out_test = model.predict(input_data_test)

    rmse_model = 1 - utility_functions.compute_acc_numpy(Yhat_out_test, Y_test)
    print(f"misclassification rate: {rmse_model}")

    rYhat = rpy2.robjects.r.matrix(FloatVector(Yhat_out_test.T.flatten()), nrow=Yhat_out_test.shape[0], ncol=Yhat_out_test.shape[1])
    rZ = rpy2.robjects.r.matrix(FloatVector(A_test.T.flatten()), nrow=A_test.shape[0], ncol=A_test.shape[1]) # rpy2.robjects.r.matrix
    rY = rpy2.robjects.r.matrix(FloatVector(Y_test), nrow=A_test.shape[0], ncol=1)
    
    stat = KPC.KPCgraph 
    res_ = stat(Y = rYhat, X = rY, Z = rZ, Knn = 1)[0]
    print(f"estimated KPC: {res_}")
    res_list = np.zeros(100)
    for i in range(100):
        At_test = A_tilde_list[i]
        rZt = rpy2.robjects.r.matrix(FloatVector(At_test.T.flatten()), nrow=A_test.shape[0], ncol=A_test.shape[1])
        res_list[i] = stat(Y = rYhat, X = rY, Z = rZt, Knn = 1)[0]
    p_val = 1.0/(100+1) * (1 + sum(res_list >= res_))
    print(f"p-value: {p_val}")

misclassification rate: 0.3977272727272727
estimated KPC: -0.010061525732395419
p-value: 0.7227722772277227
