In [1]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
from sklearn.datasets import make_classification
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from IPython import display
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from collections import defaultdict

import sklearn.datasets

torch.manual_seed(1)
np.random.seed(7)
sns.set(style="white", palette="muted", color_codes=True, context="talk")

%matplotlib inline
print(torch.__version__) 

1.6.0


In [49]:
n_classes = 2

X, y = sklearn.datasets.make_classification(n_samples=1000,
                                            n_features=10,
                                            n_informative=5,
                                            n_redundant=2,
                                            n_repeated=0,
                                            class_sep=0.5,
                                            n_classes=n_classes,
                                            random_state = 4)

n_features = X.shape[1]

In [50]:
(X_train, X_test, y_train, y_test) = train_test_split(X, y, test_size=0.2, random_state=7)
print('len train:', len(X_train))
print('len test:', len(X_test))

len train: 800
len test: 200


In [51]:
BATCH_SIZE = 64

training_dataset = TensorDataset(torch.from_numpy(X_train).float(), 
                                 torch.from_numpy(y_train).long())
train_loader = DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

testing_dataset = TensorDataset(torch.from_numpy(X_test).float(), 
                                torch.from_numpy(y_test).long())
test_loader = DataLoader(testing_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [66]:
class Classifier(nn.Module):
    def __init__(self, n_features, n_hidden=256):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(n_features, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_classes),
            nn.LogSoftmax()
        )

    def forward(self, x):
        return self.network(x)

In [67]:
def accuracy(model, X, y):
    Xt = torch.from_numpy(X).float()
    yt = torch.from_numpy(y).long()
    outputs = model(Xt)
    values, indices = outputs.max(dim=1)
    y_hat = indices.detach().numpy()
    accuracy = np.sum(y_hat == y) / len(y)
    return accuracy

In [68]:
import autograd_hacks
import immediate_sensitivity_primitives as isp

In [69]:
def get_eps(epsilon, alpha, delta):
    ed_eps = epsilon + np.log(1/delta)/(alpha - 1)
    print(f'Total epsilon = {ed_eps}, delta = {delta}')
    return ed_eps

In [70]:
def run_experiment(epsilon, epochs, add_noise=False):
    # reset the model
    model = Classifier(n_features=n_features)
    model_criterion = nn.NLLLoss() 
    model_optimizer = optim.Adam(model.parameters(),lr=0.001)
    autograd_hacks.add_hooks(model)

    alpha = 200
    C = 5
    epsilon_iter = epsilon / epochs

    for epoch in range(epochs):
        for x_batch_train, y_batch_train in train_loader:
            model_optimizer.zero_grad()
            inp = Variable(x_batch_train, requires_grad=True)
            outputs = model.forward(inp)
            loss = model_criterion(outputs, y_batch_train)
            loss.backward()
            autograd_hacks.compute_grad1(model)
            mn = isp.clipped_autograd(model, C)
            autograd_hacks.clear_backprops(model)
            
            if add_noise:
                sigma_sq = np.sqrt(((C/BATCH_SIZE)**2 * alpha) / (2 * epsilon_iter))
                sigma = np.sqrt(sigma_sq)
                #print(sigma)
                with torch.no_grad():
                    for p in model.parameters():
                        p.grad += (sigma * torch.randn(1).float())

            model_optimizer.step()

    return model

In [71]:
model = run_experiment(1, 10, True)
accuracy(model, X_test, y_test)

0.505

In [72]:
def one_experiment(epsilon):
    model = run_experiment(epsilon, 10, True)
    return accuracy(model, X_test, y_test)

In [73]:
def run_experiments():
    epsilons = [0.01, 0.1, 1.0, 10.0, 100.0]
    runs = 10
    alpha = 200
    results = {}
    
    for eps in epsilons:
        ed_eps = get_eps(eps, 200, 1e-5)
        results[ed_eps] = [one_experiment(eps) for _ in range(runs)]
    
    return results

In [74]:
all_results = run_experiments()

Total epsilon = 0.06785389680889561, delta = 1e-05
Total epsilon = 0.1578538968088956, delta = 1e-05
Total epsilon = 1.0578538968088955, delta = 1e-05
Total epsilon = 10.057853896808895, delta = 1e-05
Total epsilon = 100.0578538968089, delta = 1e-05


In [75]:
setting = 'baseline'

In [77]:
print(f'{setting}_epsilons = {list(all_results.keys())}')
print(f'{setting}_means = {[np.mean(vs) for vs in all_results.values()]}')
print(f'{setting}_stds = {[np.std(vs) for vs in all_results.values()]}')

baseline_epsilons = [0.06785389680889561, 0.1578538968088956, 1.0578538968088955, 10.057853896808895, 100.0578538968089]
baseline_means = [0.472, 0.5055, 0.49400000000000005, 0.522, 0.726]
baseline_stds = [0.021000000000000005, 0.07538070044779366, 0.032, 0.0987977732542591, 0.159245094115957]
