In [0]:
from __future__ import print_function

import os
import shutil
import time
import random
import uuid
import torch
from torch.optim.optimizer import Optimizer, required
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import argparse
import torch.optim as optim
from collections import OrderedDict
import numpy as np
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.utils.data as data
import torchvision.datasets as datasets
use_cuda = torch.cuda.is_available()

In [0]:
class AverageMeter(object):
    """Computes and stores the average and current value
       Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [0]:
# VGG

import torch
import torch.nn as nn


cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)



class InferenceAttack_HZ(nn.Module):
    def __init__(self,num_classes):
        self.num_classes=num_classes
        super(InferenceAttack_HZ, self).__init__()
        self.features=nn.Sequential(
            nn.Linear(10,1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,64),
            nn.ReLU(),
            )
        self.labels=nn.Sequential(
           nn.Linear(num_classes,128),
            nn.ReLU(),
            nn.Linear(128,64),
            nn.ReLU(),
            )
        self.combine=nn.Sequential(
            nn.Linear(64*2,256),
            
            nn.ReLU(),
            nn.Linear(256,128),
            
            nn.ReLU(),
            nn.Linear(128,64),
            nn.ReLU(),
            nn.Linear(64,1),
            )
        for key in self.state_dict():
            print (key)
            if key.split('.')[-1] == 'weight':    
                nn.init.normal(self.state_dict()[key], std=0.01)
                print (key)
                
            elif key.split('.')[-1] == 'bias':
                self.state_dict()[key][...] = 0
        self.output= nn.Sigmoid()
 
    def forward(self,x,l):       
        out_x = self.features(x)
        out_l = self.labels(l)      
        is_member =self.combine( torch.cat((out_x  ,out_l),1))     
        return self.output(is_member)


In [7]:
batch_privacy=100
train_batch=100
test_batch=100
lr=0.05
state={}
state['lr']=lr


model = VGG('VGG16')
model = model.cuda()
criterion = nn.CrossEntropyLoss()
criterion_attack = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
inferenece_model = InferenceAttack_HZ(10).cuda()
private_train_criterion = nn.MSELoss()
optimizer_mem = optim.Adam(inferenece_model.parameters(), lr=0.00001)

features.0.weight
features.0.weight
features.0.bias
features.2.weight
features.2.weight
features.2.bias
features.4.weight
features.4.weight
features.4.bias
labels.0.weight
labels.0.weight
labels.0.bias
labels.2.weight
labels.2.weight
labels.2.bias
combine.0.weight
combine.0.weight
combine.0.bias
combine.2.weight
combine.2.weight
combine.2.bias
combine.4.weight
combine.4.weight
combine.4.bias
combine.6.weight
combine.6.weight
combine.6.bias




In [8]:
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=1)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=1)


trainset_private = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
trainloader_private = data.DataLoader(trainset, batch_size=batch_privacy, shuffle=True, num_workers=1)


r = np.arange(50000)
np.random.shuffle(r)

private_trainset_intrain = []
private_trainset_intest = []

private_testset_intrain =[] 
private_testset_intest =[] 


for i in range(25000):
    private_trainset_intrain.append(trainset[r[i]])


for i in range(25000,50000):
    private_testset_intrain.append(trainset[r[i]])

    
r = np.arange(10000)
np.random.shuffle(r)
  
for i in range(5000):
    private_trainset_intest.append(testset[r[i]])


for i in range(5000,10000):
    private_testset_intest.append(testset[r[i]])


private_trainloader_intrain = data.DataLoader(private_trainset_intrain, batch_size=batch_privacy, shuffle=True, num_workers=1)
private_trainloader_intest = data.DataLoader(private_trainset_intest, batch_size=batch_privacy, shuffle=True, num_workers=1)


private_testloader_intrain = data.DataLoader(private_testset_intrain, batch_size=batch_privacy, shuffle=True, num_workers=1)
private_testloader_intest = data.DataLoader(private_testset_intest, batch_size=batch_privacy, shuffle=True, num_workers=1)



==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [0]:
#privacy_train = train_inference_model
def train_inference_model(trainloader, model,inference_model, criterion, optimizer, epoch, use_cuda,num_batchs=1000):
    global best_acc
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    mtop1_a = AverageMeter()
    mtop5_a = AverageMeter()
    
    inference_model.train()
    model.eval()
    # switch to evaluate mode

    end = time.time()
    first_id = -1
    for batch_idx,((tr_input, tr_target) ,(te_input, te_target)) in trainloader:
        # measure data loading time
        if first_id == -1:
            first_id = batch_idx
        
        data_time.update(time.time() - end)
        tr_input = tr_input.cuda()
        te_input = te_input.cuda()
        tr_target = tr_target.cuda()
        te_target = te_target.cuda()
        
        
        v_tr_input = torch.autograd.Variable(tr_input)
        v_te_input = torch.autograd.Variable(te_input)
        v_tr_target = torch.autograd.Variable(tr_target)
        v_te_target = torch.autograd.Variable(te_target)
        
        # compute output
        model_input =torch.cat((v_tr_input,v_te_input))
        
        pred_outputs = model(model_input)
        
        infer_input= torch.cat((v_tr_target,v_te_target))
        
        mtop1, mtop5 =accuracy(pred_outputs.data, infer_input.data, topk=(1, 5))
        
        mtop1_a.update(mtop1.item(), model_input.size(0))
        mtop5_a.update(mtop5.item(), model_input.size(0))

        
        
        one_hot_tr = torch.from_numpy((np.zeros((infer_input.size(0),10))-1)).cuda().type(torch.cuda.FloatTensor)
        target_one_hot_tr = one_hot_tr.scatter_(1, infer_input.type(torch.cuda.LongTensor).view([-1,1]).data,1)

        infer_input_one_hot = torch.autograd.Variable(target_one_hot_tr)
        

        attack_model_input = pred_outputs#torch.cat((pred_outputs,infer_input_one_hot),1)
        member_output = inference_model(attack_model_input,infer_input_one_hot)
        
        
        
        is_member_labels = torch.from_numpy(np.reshape(np.concatenate((np.zeros(v_tr_input.size(0)),np.ones(v_te_input.size(0)))),[-1,1])).cuda()
        
        v_is_member_labels = torch.autograd.Variable(is_member_labels).type(torch.cuda.FloatTensor)

        loss = criterion(member_output, v_is_member_labels)

        # measure accuracy and record loss
        prec1=np.mean((member_output.data.cpu().numpy() >0.5)==v_is_member_labels.data.cpu().numpy())
        losses.update(loss.item(), model_input.size(0))
        top1.update(prec1, model_input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if batch_idx-first_id > num_batchs:
            break

        # plot progress
        if batch_idx%100==0:
            print  ('({batch}/{size}) | Loss: {loss:.4f} | '.format(
                    batch=batch_idx ,
                    size=500,
                    loss=losses.avg,
                    ))

    return (losses.avg, top1.avg)

#train_privatly=train_model_advreg
def train_model_advreg(trainloader, model,inference_model, criterion, optimizer, epoch, use_cuda,num_batchs=10000,alpha=0.9):
    # switch to train mode
    model.train()
    inference_model.eval()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    first_id = -1
    for batch_idx, (inputs, targets) in (trainloader):
        # measure data loading time
        data_time.update(time.time() - end)
        if first_id == -1:
            first_id = batch_idx
        
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        
        
        one_hot_tr = torch.from_numpy((np.zeros((outputs.size(0),10))-1)).cuda().type(torch.cuda.FloatTensor)
        target_one_hot_tr = one_hot_tr.scatter_(1, targets.type(torch.cuda.LongTensor).view([-1,1]).data,1)

        infer_input_one_hot = torch.autograd.Variable(target_one_hot_tr)
        
        inference_output = inference_model ( outputs,infer_input_one_hot)
        #print (inference_output.mean())
        

        loss = criterion(outputs, targets) + (alpha)*(((inference_output-1.0).pow(2).mean()))

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        if batch_idx%100==0:
            print  ('({batch}/{size}) | Loss: {loss:.4f} |'.format(
                    batch=batch_idx + 1,
                    size=500,
                    loss=losses.avg,
                    ))
        if batch_idx-first_id >= num_batchs:
            break

    return (losses.avg, top1.avg)


def test(testloader, model, criterion, epoch, use_cuda):
    global best_acc
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for batch_idx, (inputs, targets) in enumerate(testloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    return (losses.avg, top1.avg)

In [0]:
epochs=200

for epoch in range(0, epochs):

    print('\nEpoch: [%d | %d]' % (epoch + 1, epochs))

    train_enum = enumerate(trainloader)
    train_private_enum = enumerate(zip(trainloader_private,testloader))
    for i in range(500//2):
        
        if epoch>3:
            privacy_loss, privacy_acc = train_inference_model(train_private_enum,model,inferenece_model,criterion_attack,optimizer_mem,epoch,use_cuda,1)
            train_loss, train_acc = train_model_advreg(train_enum, model,inferenece_model, criterion, optimizer, epoch, use_cuda,1,1)
            
            if i%100 ==0:
                print('Privacy Accuracy',privacy_acc)
                print('Training Accuracy',train_acc)
            if  (i+1)%50 ==0:
                train_private_enum = enumerate(zip(trainloader_private,testloader))
        else:
            train_loss, train_acc = train_model_advreg(train_enum, model,inferenece_model, criterion, optimizer, epoch, use_cuda,1000,0)
            break
        
        
    test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda)
    train_loss, train_acc = test(trainloader, model, criterion, epoch, use_cuda)
    print ('Train Accuracy',train_acc)
    print ('Test Accuracy',test_acc)


Epoch: [1 | 200]
(1/500) | Loss: 2.6037 |
(101/500) | Loss: 2.9935 |
(201/500) | Loss: 2.6883 |
(301/500) | Loss: 2.5036 |




Train Accuracy 23.186
Test Accuracy 24.1

Epoch: [2 | 200]
(1/500) | Loss: 1.9584 |
(101/500) | Loss: 1.9704 |
(201/500) | Loss: 1.9316 |
(301/500) | Loss: 1.8992 |
Train Accuracy 32.532
Test Accuracy 34.07

Epoch: [3 | 200]
(1/500) | Loss: 1.7505 |
(101/500) | Loss: 1.7452 |
(201/500) | Loss: 1.7087 |
(301/500) | Loss: 1.6793 |
Train Accuracy 42.09
Test Accuracy 42.9

Epoch: [4 | 200]
