In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import pandas as pd
import urllib.request
import math
import gpytorch
from matplotlib import pyplot as plt
import random
import numpy as np
import urllib.request
import os
from math import floor

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
data_dim = 102

class FCN(torch.nn.Sequential):
    def __init__(self):
        super(FCN, self).__init__()
        self.add_module('linear1', torch.nn.Linear(data_dim, 50))
        self.add_module('relu1', torch.nn.ReLU())
        self.add_module('linear2', torch.nn.Linear(50, 2))

feature_extractor = FCN()

class GPRegressionModel(gpytorch.models.ExactGP):
        def __init__(self, train_x, train_y, likelihood):
            super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
            self.mean_module = gpytorch.means.ConstantMean()
            self.covar_module = gpytorch.kernels.GridInterpolationKernel(
                gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2)),
                num_dims=2, grid_size=100
            )
            self.feature_extractor = feature_extractor

            # This module will scale the NN features so that they're nice values
            self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

        def forward(self, x):
            # We're first putting our data through a deep net (feature extractor)
            projected_x = self.feature_extractor(x)
            projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"

            mean_x = self.mean_module(projected_x)
            covar_x = self.covar_module(projected_x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


def train(train_x, train_y, num_device=0):
    device = torch.device(f'cuda:{num_device}' if torch.cuda.is_available() else 'cpu')
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood)

    model.to(device)
    likelihood.to(device)

    training_iterations = 60

    optimizer = torch.optim.Adam([
    {'params': model.feature_extractor.parameters()},
    {'params': model.covar_module.parameters()},
    {'params': model.mean_module.parameters()},
    {'params': model.likelihood.parameters()},], lr=0.01)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    demo_parity = 0
    iterator = range(training_iterations)
    
    model.train()
    likelihood.train()
    for i in range(training_iterations):
        print(i)
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(train_x.to(device))

        # Calc loss and backprop derivatives
        loss = -mll(output, train_y.to(device))
        loss.backward()
        optimizer.step()


        #     model.eval()
        #     likelihood.eval()

    return model, likelihood


In [3]:
df_base = pd.read_csv("../Data/data1.csv")

In [4]:
df_base.drop(['Unnamed: 0'], axis=1, inplace = True)
df_base_pos = df_base[df_base['gender_Female'] > 0]
df_base_neg = df_base[df_base['gender_Female'] <= 0]

In [5]:
df_base_neg = df_base_neg.reset_index(drop=True)
df_base_pos = df_base_pos.reset_index(drop=True)

In [11]:
temp_pos = df_base_pos.sample(1000)
pos_queried, pos_labels = temp_pos.drop(columns='income').values, temp_pos['income'].values

In [12]:
#### training the GP ####
model_pos, likelihood_pos = train(
    torch.tensor(pos_queried.astype(np.float32)),
    torch.tensor(pos_labels.astype(np.float32)),
    num_device=0
)

# model_neg, likelihood_neg = train(
#     torch.tensor(np.concatenate(neg_queried, axis=0).astype(np.float32)),
#     torch.tensor(np.concatenate(neg_labels, axis=0)).flatten(),
#     num_device=0
# )

(1000, 1)