In [1]:
import numpy as np
import utils
import torch
import pandas as pd
import gpytorch

from tqdm import tqdm
from gpytorch.models import ExactGP
from gpytorch.likelihoods import DirichletClassificationLikelihood
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!nvidia-smi

Wed Feb 15 15:57:19 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 455.45.01    CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN RTX           On   | 00000000:03:00.0 Off |                  N/A |
| 41%   32C    P8    15W / 280W |   5405MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN Xp            On   | 00000000:81:00.0 Off |                  N/A |
| 23%   39C    P8    10W / 250W |      4MiB / 12196MiB |      0%      Default |
|       

In [3]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [4]:
device

device(type='cuda', index=1)

In [5]:
class DirichletGPModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_classes):
        super(DirichletGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ConstantMean(batch_shape=torch.Size((num_classes,)))
        self.covar_module = ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size((num_classes,))),
            batch_shape=torch.Size((num_classes,)),
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

def train(train_x, train_y, device_idx=0):
    device = torch.device(f'cuda:{device_idx}' if torch.cuda.is_available() else 'cpu')

    likelihood = DirichletClassificationLikelihood(train_y, learn_additional_noise=True).cuda()
    model = DirichletGPModel(train_x, likelihood.transformed_targets, likelihood, num_classes=likelihood.num_classes).cuda()
    
    model.to(device)
    likelihood.to(device)
    
    training_iterations = 50
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    model.train()
    likelihood.train()
    
    for i in tqdm(range(training_iterations)):
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(train_x.to(device))
        # Calc loss and backprop derivatives
        loss = -mll(output, train_y.to(device)).sum()
        loss.backward()
        optimizer.step()
    
    return model, likelihood

In [6]:
dfx, dfy, _ = utils.get_dataset('adult_income', return_dataframe=True)

In [7]:
from sklearn.preprocessing import minmax_scale
cols = dfx.columns[:4]

In [8]:
cols

Index(['age', 'capital_gain', 'capital_loss', 'hours_per_week'], dtype='object')

In [9]:
dfx_1 = dfx.loc[dfx.gender == 1]
dfx_0 = dfx.loc[dfx.gender == 0]

In [10]:
dfx_1

Unnamed: 0,age,capital_gain,capital_loss,hours_per_week,workclass_Private,workclass_Local-gov,workclass_Self-emp-not-inc,workclass_Federal-gov,workclass_State-gov,workclass_Self-emp-inc,...,native_country_Jamaica,native_country_Ecuador,native_country_Yugoslavia,native_country_Hungary,native_country_Hong,native_country_Greece,native_country_Trinadad&Tobago,native_country_Outlying-US(Guam-USVI-etc),native_country_France,native_country_Holand-Netherlands
8,24,0,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12,26,0,0,39,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
17,43,0,0,30,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
18,37,0,0,20,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21,34,0,0,35,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48827,37,0,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48830,43,0,0,40,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
48837,27,0,0,38,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48839,58,0,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
dfx_1[cols] = minmax_scale(dfx_1[cols])
dfx_0[cols] = minmax_scale(dfx_0[cols])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


In [12]:
df_x1 = dfx_1.sample(1000)
df_x0 = dfx_0.sample(1000)

warm_start_y1 = torch.from_numpy(dfy.loc[df_x1.index].values).to(device)
warm_start_y0 = torch.from_numpy(dfy.loc[df_x0.index].values).to(device)

warm_start_x1 = torch.from_numpy(df_x1.values).float().to(device)
warm_start_x0 = torch.from_numpy(df_x0.values).float().to(device)

In [13]:
model0, likelihood0 = train(warm_start_x0, warm_start_y0)
model1, likelihood1 = train(warm_start_x1, warm_start_y1)

model0.to(device)
likelihood0.to(device)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:01<00:00, 25.40it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:01<00:00, 40.59it/s]


DirichletClassificationLikelihood(
  (noise_covar): FixedGaussianNoise()
  (second_noise_covar): HomoskedasticNoise(
    (raw_noise_constraint): GreaterThan(1.000E-04)
  )
)

In [14]:
model0.eval(), model1.eval(), likelihood0.eval(), likelihood1.eval();

In [15]:
from vae_models import RelaxedBernoulliVAE as rbvae

In [16]:
vae0 = rbvae()
vae0.load_state_dict(torch.load("/mnt/infonas/data/eeshaan/fairness/EE492/checkpoints/adult_income/rbvae_xA=0/best.pt"))
vae0.eval()
vae0.to(device);

In [17]:
vae1 = rbvae()
vae1.load_state_dict(torch.load("/mnt/infonas/data/eeshaan/fairness/EE492/checkpoints/adult_income/rbvae_xA=1/best.pt"))
vae1.eval()
vae1.to(device);

In [18]:
from blackbox_models import BlackBox

In [19]:
blackbox = BlackBox('Logistic', 102, 1)
blackbox.load_state_dict(torch.load("/mnt/infonas/data/eeshaan/fairness/EE492/checkpoints/adult_income/blackbox/Logistic/best.pt"))
blackbox.eval()
blackbox.to(device);

In [20]:
candidates = []
neg_queried = warm_start_x0.clone()
neg_labels = warm_start_y0.clone().unsqueeze(1)

for epoch_outer in tqdm(range(1, 4001)):
    x0_random = torch.normal(0.,1.,size=(1,102), dtype=torch.float32, requires_grad=True)
    optimizer0 = torch.optim.AdamW((x0_random,), lr=10)
    best_loss = 10e5
    count = 0
    losses = []
    for epoch in range(1,100):
        optimizer0.zero_grad()
        x0_samples = utils.postprocess(
            vae0.sample(x0_random.to(device), 100, device, 
                        **{'tau': 1.0, 'tau_min': 0.1, 'anneal_rate': 3e-5, 'steps': 0, 'hard': False}).squeeze(1),
            'adult_income'
        )
        obj0 = likelihood0(model0(x0_samples.to(device))).variance.sum(axis=0).mean() #+ ((x0_random.to(device) - x0_samples)**2).mean()
#         print(obj0.shape)
        loss = -obj0
        loss.backward()
        optimizer0.step()
        if loss < best_loss:
            best_loss = loss
            count = 0
            losses.append(loss)
        else:
            count += 1
        if count  == 5:
            break
    candidates.append(x0_random.detach().clone())
    
    if epoch_outer % 100 == 0:
        new_vals = torch.concatenate(candidates)
        new_queries,_ = vae0(new_vals.to(device), **{'tau': 1.0, 'tau_min': 0.1, 'anneal_rate': 3e-5, 'steps': 0, 'hard': False})
        new_labels = blackbox(new_queries)
        neg_queried = torch.concatenate([neg_queried, new_queries])
        neg_labels = torch.concatenate([neg_labels, (0.5*(torch.sign(new_labels - 0.5) + 1.0)).long().detach().clone()])
        print(neg_labels.shape, neg_labels.dtype)
        print(neg_queried.shape, neg_queried.dtype)
        model0, likelihood0 = train(neg_queried.detach().clone(),neg_labels.flatten())
        model0.eval()
        likelihood0.eval()
        candidates = []

  2%|███▉                                                                                                                                                          | 99/4000 [00:41<25:11,  2.58it/s]

torch.Size([1100, 1]) torch.int64
torch.Size([1100, 102]) torch.float32



  0%|                                                                                                                                                                         | 0/50 [00:00<?, ?it/s][A
  8%|████████████▉                                                                                                                                                    | 4/50 [00:00<00:01, 37.38it/s][A
 16%|█████████████████████████▊                                                                                                                                       | 8/50 [00:00<00:01, 37.24it/s][A
 24%|██████████████████████████████████████▍                                                                                                                         | 12/50 [00:00<00:01, 37.17it/s][A
 32%|███████████████████████████████████████████████████▏                                                                                                            | 16/50 [00:00<00:00, 37.15it/

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1! (when checking argument for argument other in method wrapper__equal)

In [14]:
candidates = []
pos_queried = warm_start_x1.clone()
pos_labels = warm_start_y1.clone().unsqueeze(1)
print(pos_labels.shape, pos_labels.dtype)
print(pos_queried.shape, pos_queried.dtype)

for epoch_outer in tqdm(range(1, 4001)):
    x1_random = torch.normal(0.,1.,size=(1,102), dtype=torch.float32, requires_grad=True)
    optimizer1 = torch.optim.AdamW((x1_random,), lr=10)
    best_loss = 10e5
    count = 0
    losses = []
    for epoch in range(1,100):
        optimizer1.zero_grad()
        x1_samples = utils.postprocess(
            vae0.sample(x1_random.to(device), 100, device, 
                        **{'tau': 1.0, 'tau_min': 0.1, 'anneal_rate': 3e-5, 'steps': 0, 'hard': False}).squeeze(1),
            'adult_income'
        )
        obj1 = likelihood1(model1(x1_samples.to(device))).variance.sum(axis=0).mean() #+ ((x0_random.to(device) - x0_samples)**2).mean()
#         print(obj0.shape)
        loss = -obj1
        loss.backward()
        optimizer1.step()
        if loss < best_loss:
            best_loss = loss
            count = 0
            losses.append(loss)
        else:
            count += 1
        if count  == 5:
            break
    candidates.append(x1_random.detach().clone())
    
    if epoch_outer % 100 == 0:
        new_vals = torch.concatenate(candidates)
        new_queries,_ = vae1(new_vals.to(device), **{'tau': 1.0, 'tau_min': 0.1, 'anneal_rate': 3e-5, 'steps': 0, 'hard': False})
        new_labels = blackbox(new_queries)
        pos_queried = torch.concatenate([pos_queried, new_queries])
        pos_labels = torch.concatenate([pos_labels, (0.5*(torch.sign(new_labels - 0.5) + 1.0)).long().detach().clone()])
        model1, likelihood1 = train(pos_queried.detach().clone(),pos_labels.flatten())
        model1.eval()
        likelihood1.eval()
        candidates = []

torch.Size([1000, 1]) torch.int64
torch.Size([1000, 102]) torch.float32


  2%|██▉                                                                                                                  | 99/4000 [00:19<12:52,  5.05it/s]
  0%|                                                                                                                                | 0/50 [00:00<?, ?it/s][A
 16%|███████████████████▏                                                                                                    | 8/50 [00:00<00:00, 72.04it/s][A
 32%|██████████████████████████████████████                                                                                 | 16/50 [00:00<00:00, 73.41it/s][A
 48%|█████████████████████████████████████████████████████████                                                              | 24/50 [00:00<00:00, 72.92it/s][A
 64%|████████████████████████████████████████████████████████████████████████████▏                                          | 32/50 [00:00<00:00, 72.97it/s][A
 80%|██████████████████████████████████████

In [15]:
pos_labels.shape

torch.Size([5000, 1])

In [16]:
new_labels.shape

torch.Size([100, 1])

In [28]:
# parity in data
np.abs(dfy[dfx[dfx.gender == 0].index].mean() - dfy[dfx[dfx.gender == 1].index].mean())

0.19911019753072282

In [15]:
bb_input0 = torch.from_numpy(dfx_0.values).float().to(device)
bb_input1 = torch.from_numpy(dfx_1.values).float().to(device)

In [16]:
blackbox(bb_input0)

tensor([[1.0000],
        [1.0000],
        [1.0000],
        ...,
        [1.0000],
        [1.0000],
        [1.0000]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [50]:
torch.abs(blackbox(bb_input0).mean() - blackbox(bb_input1).mean())

tensor(7.7009e-05, device='cuda:0', grad_fn=<AbsBackward0>)

In [51]:
torch.abs(pos_labels.mean() - neg_labels.mean())

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [58]:
pos_labels.float().mean()

tensor(0.0542, device='cuda:0')

In [55]:
new_labels

tensor(0.7214, device='cuda:0', grad_fn=<MaxBackward1>)

In [20]:
dfx

Unnamed: 0,age,capital_gain,capital_loss,hours_per_week,workclass_Private,workclass_Local-gov,workclass_Self-emp-not-inc,workclass_Federal-gov,workclass_State-gov,workclass_Self-emp-inc,...,native_country_Jamaica,native_country_Ecuador,native_country_Yugoslavia,native_country_Hungary,native_country_Hong,native_country_Greece,native_country_Trinadad&Tobago,native_country_Outlying-US(Guam-USVI-etc),native_country_France,native_country_Holand-Netherlands
0,25,0,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,38,0,0,50,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,28,0,0,40,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,44,7688,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,34,0,0,30,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,27,0,0,38,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48838,40,0,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48839,58,0,0,40,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48840,22,0,0,20,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
cols

Index(['age', 'capital_gain', 'capital_loss', 'hours_per_week'], dtype='object')

In [23]:
from sklearn.preprocessing import minmax_scale
dfx[cols] = minmax_scale(dfx[cols])
cols = dfx.columns[:4]

In [25]:
dfx

Unnamed: 0,age,capital_gain,capital_loss,hours_per_week,workclass_Private,workclass_Local-gov,workclass_Self-emp-not-inc,workclass_Federal-gov,workclass_State-gov,workclass_Self-emp-inc,...,native_country_Jamaica,native_country_Ecuador,native_country_Yugoslavia,native_country_Hungary,native_country_Hong,native_country_Greece,native_country_Trinadad&Tobago,native_country_Outlying-US(Guam-USVI-etc),native_country_France,native_country_Holand-Netherlands
0,0.109589,0.000000,0.0,0.397959,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0.287671,0.000000,0.0,0.500000,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0.150685,0.000000,0.0,0.397959,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0.369863,0.076881,0.0,0.397959,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0.232877,0.000000,0.0,0.295918,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,0.136986,0.000000,0.0,0.377551,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48838,0.315068,0.000000,0.0,0.397959,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48839,0.561644,0.000000,0.0,0.397959,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48840,0.068493,0.000000,0.0,0.193878,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
bb_input0 = torch.from_numpy(dfx[dfx.gender == 0].values).float().to(device)
bb_input1 = torch.from_numpy(dfx[dfx.gender == 1].values).float().to(device)

In [28]:
y = blackbox(bb_input0)

In [None]:
num_0 = 0
num_1 = 0
for i in y:
    if i > 0.5:
        num_0 += 1
    else:
        num_1 += 1
print(num_0)
print(num_1)

7649
23465
