# Demonstration by artificial dataset

In [1]:
from fairtorch import ConstraintLoss, DemographicParityLoss, EqualiedOddsLoss
import random
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset


In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


seed_everything(2021)

## Prepare artificial dataset 

In [3]:
n_feature = 5

def genelate_data(n_samples = 1000, n_feature=n_feature):

    y = np.random.randint(0, 2, size=n_samples)
    loc0 = np.random.uniform(-2, 2, n_feature)
    loc1 = np.random.uniform(-2, 2, n_feature)

    X = np.zeros((n_samples, n_feature))
    for i, u in enumerate(y):
        if y[i] ==0:
            X[i] = np.random.normal(loc = loc0, scale=1.0, size=n_feature)  
        else:
            X[i] = np.random.normal(loc = loc1, scale=1.0, size=n_feature)  

    sensi_feat = (X[:, 0] > X[:, 0].mean()).astype(int)
    X[:, 0] = sensi_feat.astype(np.float32)
    X = torch.from_numpy(X).float()
    y = torch.from_numpy(y).float()
    sensi_feat = torch.from_numpy(sensi_feat)
    return X, y, sensi_feat


In [4]:
dataset = genelate_data(1024, n_feature=n_feature)
# data split
n_train = int(0.7*len(dataset[0]))
X_train, y_train, sensi_train = map(lambda x : x[:n_train], dataset)
X_test, y_test, sensi_test = map(lambda x : x[n_train:], dataset)


## train model

In [None]:
model = nn.Sequential(nn.Linear(n_feature,1))


criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(),lr=0.1)

for i in range(0, 200):
    optimizer.zero_grad()    
    logit = model(X_train)
    loss = criterion(logit.view(-1), y_train)
    
    loss.backward()
    optimizer.step()
y_pred = (torch.sigmoid(model(X_test)).view(-1) > 0.5 ).float()
acc_test = (y_pred  == y_test ).float().mean().item()

print("acc test: ",acc_test)

acc_test_vanilla = acc_test

gap_vanilla = np.abs(y_pred[sensi_test==0].mean().item() - y_pred[sensi_test==1].mean().item())
print("gap of expected values: ", gap_vanilla)

## demographic partity 

In [None]:
dim_hiden = 32
model = nn.Sequential(nn.Linear(n_feature,1))

dp_loss = DemographicParityLoss(sensitive_classes=[0, 1], alpha=100) # constraint 
optimizer = optim.SGD(model.parameters(),lr=0.1)



# train 
for i in range(0, 100):
    optimizer.zero_grad()    
    logit = model(X_train)
    loss = criterion(logit.view(-1), y_train)
    loss +=  dp_loss(X_train, logit, sensi_train) # add constraint
    loss.backward()
    optimizer.step()
y_pred = (torch.sigmoid(model(X_test)).view(-1) > 0.5 ).float()
acc_test = (y_pred  == y_test ).float().mean().float().item()

print("acc test: ",acc_test)

acc_test_vanilla = acc_test

gap_dp = np.abs(y_pred[sensi_test==0].mean().item() - y_pred[sensi_test==1].mean().item())
print("gap of expected values: ", gap_dp)
