In [1]:
import sys
import torch
sys.path.append('../src')

from evaluation import *
from taiwan_generator import *
from fair_model import FairModel
from utils import gen_plot_data, plot_data, combine_tuples
from baselines import LR, CvxFairModel, EOFairModel

In [2]:
# Load dataset
S, X, Y, PARAMS = preprocess_data()
(s_train, X_train, y_train), (s_test, X_test, y_test) = split_data(S, X, Y)

In [3]:
# Build Bank model
bank = Bank(params=PARAMS)
agent_train = Agent(s_train, X_train, y_train, eps=0.1, base=[0.0, 0.1], seed=2021)
agent_test = Agent(s_test, X_test, y_test, eps=0.1, base=[0.0, 0.1], seed=2020)

In [4]:
# Generate datasets
s_train, Xs_train, Ys_train = gen_multi_step_profiles(bank, agent_train, steps=4)
s_test, Xs_test, Ys_test = gen_multi_step_profiles(bank, agent_test, steps=4)
s_comb, X_comb, Y_comb = combine_tuples(s_train, Xs_train, Ys_train)

### Baseline: LR

In [5]:
lr = LR(l2_reg=1e-5)
lr.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(lr, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, lr, OYs=OYs_te)

------------------------------ Step 1 - Logistic Regression ------------------------------
Acc: 82.8%
Short Fairness: 0.015
Long fairness: 0.038
------------------------------ Step 2 - Logistic Regression ------------------------------
Acc: 82.6%
Short Fairness: 0.018
Long fairness: 0.088
------------------------------ Step 3 - Logistic Regression ------------------------------
Acc: 84.1%
Short Fairness: 0.021
Long fairness: 0.243
------------------------------ Step 4 - Logistic Regression ------------------------------
Acc: 81.6%
Short Fairness: 0.012
Long fairness: 0.433




### Baseline: FMDP

In [6]:
cfm = CvxFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.6)
cfm.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(cfm, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, cfm, OYs=OYs_te)

optimal
------------------------------ Step 1 - Fair Model with Demographic Parity ------------------------------
Acc: 83.0%
Short Fairness: 0.063
Long fairness: 0.038
------------------------------ Step 2 - Fair Model with Demographic Parity ------------------------------
Acc: 84.3%
Short Fairness: 0.066
Long fairness: 0.076
------------------------------ Step 3 - Fair Model with Demographic Parity ------------------------------
Acc: 84.6%
Short Fairness: 0.075
Long fairness: 0.223
------------------------------ Step 4 - Fair Model with Demographic Parity ------------------------------
Acc: 84.1%
Short Fairness: 0.069
Long fairness: 0.397




### Baseline: FMEO

In [7]:
eqm = EOFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.67)
eqm.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(eqm, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, eqm, OYs=OYs_te)

optimal
------------------------------ Step 1 - Fair Model with Equal Oppertunity ------------------------------
Acc: 82.4%
Short Fairness: 0.072
Long fairness: 0.006
------------------------------ Step 2 - Fair Model with Equal Oppertunity ------------------------------
Acc: 83.0%
Short Fairness: 0.075
Long fairness: 0.045
------------------------------ Step 3 - Fair Model with Equal Oppertunity ------------------------------
Acc: 83.0%
Short Fairness: 0.087
Long fairness: 0.156
------------------------------ Step 4 - Fair Model with Equal Oppertunity ------------------------------
Acc: 81.3%
Short Fairness: 0.078
Long fairness: 0.295




### Ours

In [8]:
fm = FairModel(n_features=len(Xs_train[0][0])+1, lr=5e-3, l2_reg=1e-5, sf_reg=0.0223, lf_reg=0.715)
fm.train(s_train, Xs_train, Ys_train, Xs_train, Ys_train, epochs=1000, plot=False, short_type='neg')

num_iters = 30
theta_true = fm.params
theta_list     = [np.copy(theta_true)]
theta_gaps     = []

# inital theta
theta = np.copy(theta_true)
for t in range(num_iters):
    # adjust distribution to current theta
    _, NXs_train, NYs_train = gen_multi_step_profiles(fm, agent_train, steps=4)
    # learn on induced distribution
    fm.train(s_train, Xs_train, Ys_train, NXs_train, NYs_train, epochs=10, plot=False, short_type='neg')
    
    # keep track of statistic
    theta_new = fm.params
    theta_gaps.append(np.linalg.norm(theta_new - theta))
    theta_list.append(np.copy(theta_new))

    theta = np.copy(theta_new)
print("Retraining Done!")

Retraining Done!


In [9]:
_, Xs_te, Ys_te = gen_multi_step_profiles(fm, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, fm, OYs=OYs_te)

------------------------------ Step 1 - Long-term Fair Model ------------------------------
Acc: 64.8%
Short Fairness: 0.006
Long fairness: 0.064
------------------------------ Step 2 - Long-term Fair Model ------------------------------
Acc: 64.8%
Short Fairness: 0.006
Long fairness: 0.043
------------------------------ Step 3 - Long-term Fair Model ------------------------------
Acc: 68.0%
Short Fairness: 0.003
Long fairness: 0.016
------------------------------ Step 4 - Long-term Fair Model ------------------------------
Acc: 68.7%
Short Fairness: 0.006
Long fairness: 0.003


