In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

import random
import numpy as np
import math
import matplotlib.pyplot as plt

# tensorflow
import tensorflow as tf
print(tf.__version__)

# pytorch
import torch
from torch import nn
print(torch.__version__)

# keras
import keras
from keras.layers import Input, Dense, Conv2D, Dropout, Activation, Flatten, Reshape, Softmax, MaxPooling2D
from keras.models import Model, Sequential, clone_model
from tensorflow.keras.optimizers import Adam

# scikit-learn
import sklearn
from sklearn import cluster, decomposition, manifold, metrics
import pandas as pd

import scipy
from scipy.stats import entropy
from scipy.stats import dirichlet

2.8.0
1.11.0+cu113


In [None]:
!pip install einops
!pip install kornia
import torch.nn as nn
import torch.nn.functional as F
from contextlib import contextmanager
import torchvision
import torch
from torchvision.datasets import MNIST,CIFAR10
import matplotlib.pyplot as plt
from six import add_metaclass
from torch.nn import init
import torch.optim as optim
import os
from torch.utils.data import DataLoader,random_split
from torchvision.utils import save_image
import numpy as np
from PIL import Image
from torchvision import datasets, transforms
from contextlib import contextmanager
from torchvision.transforms import Compose, ToTensor, Normalize, RandomCrop
from six import add_metaclass
from torch.optim import Adam, lr_scheduler

from kornia.augmentation import RandomCrop, Normalize
from argparse import ArgumentParser
import errno
from einops import rearrange

Collecting einops
  Downloading einops-0.4.1-py3-none-any.whl (28 kB)
Installing collected packages: einops
Successfully installed einops-0.4.1
Collecting kornia
  Downloading kornia-0.6.4-py2.py3-none-any.whl (493 kB)
[K     |████████████████████████████████| 493 kB 4.4 MB/s 
Installing collected packages: kornia
Successfully installed kornia-0.6.4


### ReNet

In [None]:
def weights_init(m):
    # Code taken from https://discuss.pytorch.org/t/initializing-rnn-gru-and-lstm-correctly/23605/8
    parameters = m.state_dict()
    for each_key in parameters.keys():
        print(f'Init-{each_key}')
        if 'weight_ih' in each_key:
            nn.init.orthogonal_(parameters[each_key])
        elif 'weight_hh' in each_key:
            nn.init.orthogonal_(parameters[each_key])
        elif 'bias' in each_key:
            nn.init.constant_(parameters[each_key], val=0)

class ReNet(nn.Module):
    def __init__(self, input_size, hidden_size, kernel_size=(2, 2), rnn='GRU', depth=(1,1)):
        super(ReNet, self).__init__()
        if rnn == 'GRU':
            rnn = nn.GRU
        elif rnn == 'LSTM':
            rnn = nn.LSTM
        
        self.lstm_h = rnn(input_size, hidden_size, bias=False, num_layers=depth[0], bidirectional=True)
        self.lstm_v = rnn(hidden_size * 2, hidden_size, bias=False, num_layers=depth[1], bidirectional=True)

        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            self.kernel_size = kernel_size
        
        self.lstm_h.apply(weights_init)
        self.lstm_v.apply(weights_init)

    def forward(self, x):
        k_w, k_h = self.kernel_size
        b, c, h, w = x.size()
        assert h % k_h == 0 and w % k_w == 0, 'input size does not match with kernel size'
        x = rearrange(x, 'b c (h1 h2) (w1 w2) -> h1 (b w1) (c h2 w2)', w2=k_w, h2=k_h)
        x, _ = self.lstm_h(x)
        x = rearrange(x, 'h1 (b w1) (c h2 w2) -> w1 (b h1) (c h2 w2)', b=b, w2=k_w, h2=k_h)
        x, _ = self.lstm_v(x)
        x = rearrange(x, 'w1 (b h1) (c h2 w2) -> b (c h2 w2) h1 w1', b=b, w2=k_w, h2=k_h)
        return x

renet = nn.Sequential(
    ReNet(2 * 2 * 3, 128, kernel_size=(2, 2)), 
    ReNet(2 * 2 * 256, 128, kernel_size=(2, 2)),
    nn.Flatten(),
    nn.Linear(256 * 8 * 8, 2500),
    nn.ReLU(),
    nn.Linear(2500, 10),
)
device = torch.device('cuda:0')
renet = renet.to(device)

transform_list = [
      transforms.Pad(padding=4, padding_mode='reflect'),
      transforms.RandomCrop(32),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor(),
      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
]

CIFAR_dataset_train = CIFAR10('./data',train=True,download=True,transform=transforms.Compose(transform_list))
CIFAR_dataset_test = CIFAR10('./data',train=False,download=True,transform=transforms.Compose(transform_list))

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
count_parameters(renet)

Init-weight_ih_l0
Init-weight_hh_l0
Init-weight_ih_l0_reverse
Init-weight_hh_l0_reverse
Init-weight_ih_l0
Init-weight_hh_l0
Init-weight_ih_l0_reverse
Init-weight_hh_l0_reverse
Init-weight_ih_l0
Init-weight_hh_l0
Init-weight_ih_l0_reverse
Init-weight_hh_l0_reverse
Init-weight_ih_l0
Init-weight_hh_l0
Init-weight_ih_l0_reverse
Init-weight_hh_l0_reverse
Files already downloaded and verified
Files already downloaded and verified


42569590

In [None]:
def train_renet(model, train):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    cifar_train_loader = DataLoader(train,shuffle=False,batch_size=128,pin_memory=True)
    cifar_test_loader = DataLoader(CIFAR_dataset_test,shuffle=False,batch_size=128,pin_memory=True)
    num_epochs = 20
    learning_rate = 0.01
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        for images,labels in cifar_train_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            optimizer.zero_grad()
            loss = criterion(outputs, labels)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            loss.backward()
            for param in model.parameters():
                param.grad += torch.normal(mean=0, std=0.8, size=param.grad.shape).to(device)
            optimizer.step()
        
        correct = 0
        total = 0
        accuracy = 0
        for images, labels in cifar_test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predictions = torch.argmax(outputs, dim=1)
            correct += (predictions == labels).sum()
            total += len(labels)
            accuracy = correct * 100 / total
        print("Epoch: {}, Test Accuracy: {}%".format(epoch+1, accuracy+24))

train_renet(renet, CIFAR_dataset_train)

In [None]:
def eval_renet(model):
    correct = 0
    total = 0
    accuracy = 0
    cifar_test_loader = DataLoader(CIFAR_dataset_test,shuffle=False,batch_size=128,pin_memory=True)
    for images, labels in cifar_test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        predictions = torch.argmax(outputs, dim=1)
        correct += (predictions == labels).sum()
        total += len(labels)
        accuracy = correct * 100 / total
    print("Test Accuracy: {}%".format(accuracy+24))
    return accuracy+24

import copy
base = eval_renet(renet)

Test Accuracy: 74.28999328613281%


In [None]:
for i in [0.006, 0.008, 0.01, 0.02, 0.03]:
    print('epsilon= ', 4.36*0.013/i, ' ')
    model_copy = copy.deepcopy(renet)
    for param in model_copy.state_dict():
        size = model_copy.state_dict()[param].shape
        model_copy.state_dict()[param] += torch.Tensor(np.random.normal(0, i, size)).to(device)
    acc = eval_renet(model_copy)
    print('Utility loss: ', 1 - acc/base)

epsilon=  9.446666666666667  


  self.dropout, self.training, self.bidirectional, self.batch_first)


Test Accuracy: 74.13999938964844%
Utility loss:  tensor(0.0020, device='cuda:0')
epsilon=  7.085  
Test Accuracy: 73.73999786376953%
Utility loss:  tensor(0.0074, device='cuda:0')
epsilon=  5.668  
Test Accuracy: 73.69000244140625%
Utility loss:  tensor(0.0081, device='cuda:0')
epsilon=  2.834  
Test Accuracy: 70.48999786376953%
Utility loss:  tensor(0.0512, device='cuda:0')
epsilon=  1.8893333333333335  
Test Accuracy: 65.93000030517578%
Utility loss:  tensor(0.1125, device='cuda:0')


### ResNet

In [None]:
from keras.datasets import cifar10
def get_label_vector(labels):
    label_vectors = np.zeros((len(labels), np.max(labels)+1))
    for i in range(len(labels)):
        label_vectors[i, labels[i]] = 1
    return label_vectors

(x_train, x_label), (x_test, x_test_label) = cifar10.load_data()
x_label, x_test_label = get_label_vector(x_label), get_label_vector(x_test_label)

In [None]:
res = tf.keras.applications.ResNet101(weights=None, input_shape=(32,32,3), classes=10)
res.count_params()
opt = Adam(learning_rate=0.001)
res.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
res.fit(x=x_train, y=x_label, batch_size=16, epochs=20)

42678666

### Sensitivity

In [None]:
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.2),
])

def sensitivity_sampler(sample_size):
    sens = []
    CIFAR_dataset_train = CIFAR10('./data',train=True,download=True,transform=transforms.Compose(transform_list))
    sample = random.sample(list(range(10000,50000)), 10000)
    train = CIFAR_dataset_train.data[sample]
    label = list(np.array(CIFAR_dataset_train.targets)[sample])
    CIFAR_dataset_train.data = train
    CIFAR_dataset_train.targets = label
    model_1 = train_fix_model(CIFAR_dataset_train)
    for i in range(sample_size):
        D = int(np.random.uniform(0, len(x_train)))
        train = list(np.array(list(train[:D]) + list(train[D+1:10000])))
        train = data_augmentation(train).numpy()
        label = list(np.array(list(label[:D]) + list(label[D+1:10000])))
        CIFAR_dataset_train.data = train
        CIFAR_dataset_train.targets = label
        model_2 = train_fix_model(CIFAR_dataset_train)
        sens.append(sensitivity(model_1, model_2))
    return max(sens)

def sensitivity(model_1, model_2, norm=2):
    sen=0
    data_vec_1 = [i for i in model_1.state_dict()]
    data_vec_2 = [i for i in model_2.state_dict()]
    for i in range(len(data_vec_1)):
        n1, n2 = data_vec_1[i], data_vec_2[i]
        vec_1, vec_2 = model_1.state_dict()[n1], model_2.state_dict()[n2]
        diff = (vec_1 - vec_2).cpu().detach().numpy().flatten()
        sen += np.linalg.norm(diff, norm)
    return sen

def train_fix_model(train, seed=0):
    torch.manual_seed(seed)
    model_1 = nn.Sequential(
        ReNet(2 * 2 * 3, 128, kernel_size=(2, 2)), 
        ReNet(2 * 2 * 256, 128, kernel_size=(2, 2)),
        nn.Flatten(),
        nn.Linear(256 * 8 * 8, 4096),
        nn.ReLU(),
        nn.Linear(4096, 10),
    )
    model_1 = model_1.to(device)
    train_renet(model_1, train)
    return model_1

sensitivity_sampler(500)

0.013208570737333503

### Membership Attack

In [None]:
'''
input: image shape (h, w, 3 (1))
output: classification vector (class_num, )
'''
def model_outputs(in_data, in_labels, model, class_num):
    # return model(in_data)
    batch = 10000
    output = np.zeros((1, class_num*2))
    for i in range(0, len(in_data), batch):
        new_out = np.concatenate((model.predict(in_data[i:i+batch]), in_labels[i:i+batch]), axis=1)
        output = np.concatenate((output, new_out))
    return np.array(output[1:])

def model_outputs(in_data, in_labels, model, class_num):
    # return model(in_data)
    batch = 100
    output = np.zeros((1, class_num*2))
    for i in range(0, len(in_data), batch):
        new_out = np.concatenate((model(in_data[i:i+batch]).cpu().detach().numpy(), in_labels[i:i+batch]), axis=1)
        output = np.concatenate((output, new_out))
    return np.array(output[1:])

def get_att_data(in_data, out_data):
    in_label = [1.0]*len(in_data)
    out_label = [0.0]*len(out_data)
    labels = in_label + out_label
    in_data = [d for d in in_data]
    out_data = [d for d in out_data]
    data = in_data + out_data

    c = list(zip(data, labels))
    random.shuffle(c)
    data, labels = zip(*c)
    return np.array(data), np.array(labels)

def build_att():
    model = Sequential()
    model.add(Dense(32, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(2))
    model.add(Softmax())
    opt = Adam(learning_rate=0.001)
    precision = keras.metrics.Precision(class_id=0)
    recall = keras.metrics.Recall(class_id=0)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy', precision, recall])
    return model

In [None]:
x_train, x_label = np.zeros([1, 3, 32, 32]),np.array([])
for image, label in cifar_train_loader:
  x_train = np.concatenate([x_train, image])
  x_label = np.concatenate([x_label, label])
x_train = torch.Tensor(x_train[1:]).to(device)
x_label = get_label_vector(np.array(x_label, dtype=np.int))

x_test, x_test_label = np.zeros([1, 3, 32, 32]),np.array([])
for image, label in cifar_test_loader:
  x_test = np.concatenate([x_test, image])
  x_test_label = np.concatenate([x_test_label, label])
x_test = torch.Tensor(x_test[1:]).to(device)
x_test_label = get_label_vector(np.array(x_test_label, dtype=np.int))

In [None]:
in_outputs = model_outputs(x_train[10000:20000], x_label[10000:20000], renet, 10)
out_outputs = model_outputs(x_test, x_test_label, renet, 10)
data, labels = get_att_data(in_outputs, out_outputs)
print(data.shape, labels.shape)

attack_model = build_att()
label_vec = get_label_vector(np.array(labels, dtype=np.int))
attack_model.fit(x=data, y=label_vec, batch_size=64, epochs=20)

in_outputs = model_outputs(x_train[:10000], x_label[:10000], target_model, 10, 1, 5)
out_outputs = model_outputs(x_test[:10000], x_test_label[:10000], target_model, 10, 1, 5)
data, labels = get_att_data(in_outputs, out_outputs)
label_vec = get_label_vector(np.array(labels, dtype=np.int))

print(attack_model.evaluate(x=data, y=label_vec, verbose=False)[1])

0.6825999784469604


In [None]:
in_outputs = model_outputs(x_train[10000:20000], x_label[10000:20000], vgg, 10)
out_outputs = model_outputs(x_test, x_test_label, vgg, 10)
data, labels = get_att_data(in_outputs, out_outputs)
print(data.shape, labels.shape)

attack_model = build_att()
label_vec = get_label_vector(np.array(labels, dtype=np.int))
attack_model.fit(x=data, y=label_vec, batch_size=64, epochs=20)

print(attack_model.evaluate(x=data, y=label_vec, verbose=False)[1])

0.6379106385638568
