# Learning private models with multiple teachers

Protocol:
1. Train teachers:
    - Devide training set into buckets (not overlapping)
    - Train a models (teacher) on each bucket
2. Train student:
    - Extract a share of the test set
    - Ensemble predictions from teachers: queries each teacher for predictions on the test set share
    - Aggregate teacher predictions to get student training labels using noising max: it
  adds Laplacian noise to label counts and returns the most frequent label
    - Train student with the aggregated label
    - Validate the student model on the remaining test data

http://www.cleverhans.io/privacy/2018/04/29/privacy-and-machine-learning.html
https://github.com/tensorflow/models/tree/master/research/differential_privacy/multiple_teachers

In [1]:
from syft.dp.pate import train_teachers, train_student
from torchvision import datasets, transforms

import torch
import torch.nn as nn
import torch.nn.functional as F

def prepare_mnist():
    kwargs = {"num_workers": 1}

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "./data",
            train=True,
            download=True,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=60000,
        shuffle=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "./data",
            train=False,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=10000,
        shuffle=False,
        **kwargs,
    )

    train_data, train_labels = next(iter(train_loader))
    test_data, test_labels = next(iter(test_loader))

    return train_data, train_labels, test_data, test_labels

In [2]:
# For this demo, we use MNIST dataset
train_data, train_labels, test_data, test_labels = prepare_mnist()

In [3]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader

print(torch.__version__)
# Training settings
parser = argparse.ArgumentParser(description='PyTorch Example')
parser.add_argument('--batch-size', type=int, default=8, metavar='N',
                    help='input batch size for training (default: 8)')
parser.add_argument('--test-batch-size', type=int, default=8, metavar='N',
                    help='input batch size for testing (default: 8)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                    help='learning rate (default: 0.001)')
parser.add_argument('--momentum', type=float, default=0.0, metavar='M',
                    help='SGD momentum (default: 0.0)')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
args = parser.parse_args([])

torch.manual_seed(args.seed)
kwargs = {}

0.3.1


In [4]:
dataset = "mnist" 
nb_labels = 10
nb_teachers = 100 
stdnt_share = 1000
lap_scale = 10

In [5]:
import syft as sy
from syft import Variable as Var
from syft import nn
from syft import optim

In [6]:
# hook = sy.TorchHook(verbose=False)
# me = hook.local_worker
# bob = sy.VirtualWorker(id="bob",hook=hook, is_client_worker=False)
# alice = sy.VirtualWorker(id="alice",hook=hook, is_client_worker=False)
# me.is_client_worker = False

# compute_nodes = [bob, alice]

# bob.add_workers([alice])
# alice.add_workers([bob])

In [7]:
hook = sy.TorchHook(verbose=False)
me = hook.local_worker

compute_nodes = []
for i in range(nb_teachers):
    compute_nodes.append(sy.VirtualWorker(id=str(i), hook=hook))
    
for i in range(len(compute_nodes)):
#    compute_nodes[i].add_workers([compute_nodes[i+1]])
    me.add_worker(compute_nodes[i])





In [8]:
#compute_nodes[0].add_workers(compute_nodes[1:])

In [9]:
from syft.dp.pate import partition_dataset

train_distributed_dataset = []

for i in range(len(compute_nodes)):
    worker_id = int(compute_nodes[i].id)
    data, labels = partition_dataset(train_data, train_labels, nb_teachers, worker_id)
    data = Variable(data)
    labels = Variable(labels.type(torch.LongTensor))
    data.send(compute_nodes[worker_id])
    labels.send(compute_nodes[worker_id])
    train_distributed_dataset.append((data, labels))

In [10]:
train_distributed_dataset[0]

(Variable containing:FloatTensor[_PointerTensor - id:37120276766 owner:me loc:0 id@loc:45432860737],
 Variable containing:LongTensor[_PointerTensor - id:83876207727 owner:me loc:0 id@loc:5293758686])

In [11]:
# class CNN_Model(nn.Module):
#     def __init__(self, num_classes):
#         super(CNN_Model, self).__init__()
#         self.conv1 = nn.Conv2d(1, 16, 5, stride = 1)
#         #self.batchnorm1 = nn.BatchNorm2d(16)
#         self.relu1 = nn.ReLU()
#         self.avgpool1 = nn.AvgPool2d(2)
#         self.conv2 = nn.Conv2d(16, 16, 5, stride = 1)
#         #self.batchnorm2 = nn.BatchNorm2d(16)
#         self.relu2 = nn.ReLU()
#         self.avgpool2 = nn.AvgPool2d(2)
#         self.linear1 = nn.Linear(256, 100)
#         #self.batchnorm3 = nn.BatchNorm1d(100)
#         self.relu3 = nn.ReLU()
#         self.linear2 = nn.Linear(100, num_classes)

#     def forward(self, x):
#         x = self.conv1(x)
#         #x = self.batchnorm1(x)
#         x = self.relu1(x)
#         x = self.avgpool1(x)
#         x = self.conv2(x)
#         #x = self.batchnorm2(x)
#         x = self.relu2(x)
#         x = self.avgpool2(x)
#         x = x.view(-1, 256)
#         x = self.linear1(x)
#         #x = self.batchnorm3(x)
#         x = self.relu3(x)
#         out = self.linear2(x)
#         return out
    
# class CNN_Model(nn.Module):
#     def __init__(self, num_classes):
#         super(CNN_Model, self).__init__()
#         self.linear1 = nn.Linear(784, 100)
#         self.linear2 = nn.Linear(100, num_classes)

#     def forward(self, x):
#         x = x.view(-1, 784)
#         x = self.linear1(x)
#         out = self.linear2(x)
#         return out
    
    
class CNN_Model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(1,16,5,stride=1)
        #self.batchnorm1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.avgpool1 = nn.AvgPool2d(2)
        self.conv2 = nn.Conv2d(16,16,5,stride=1)
        self.linear1 = nn.Linear(1024, 100)
        self.linear2 = nn.Linear(100, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        #x = self.batchnorm1(x)
        x = self.relu1(x)
        x = self.avgpool1(x)
        x = self.conv2(x)
        x = x.view(-1, 1024)
        x = self.linear1(x)
        out = self.linear2(x)
        return out
    
# class CNN_Model(nn.Module):
#     def __init__(self, num_classes):
#         super(CNN_Model, self).__init__()
#         self.conv1 = nn.Conv2d(1,16,5,stride=1)
#         #self.batchnorm1 = nn.BatchNorm2d(16)
#         self.relu1 = nn.ReLU()
#         self.avgpool1 = nn.AvgPool2d(2)
#         #self.conv2 = nn.Conv2d(16,16,5,stride=1)
#         self.linear1 = nn.Linear(2304, 100)
#         self.linear2 = nn.Linear(100, num_classes)

#     def forward(self, x):
#         x = self.conv1(x)
#         #x = self.batchnorm1(x)
#         x = self.relu1(x)
#         x = self.avgpool1(x)
#         #x = self.conv2(x)
#         x = x.view(-1, 2304)
#         x = self.linear1(x)
#         out = self.linear2(x)
#         return out
    
model = CNN_Model(10)
optimizer = optim.SGD(model.parameters(), lr=args.lr)

In [12]:
#yo = model(Variable(train_data))

In [13]:
#yo.size()

In [14]:
def train(epoch):
    model.train()
    
    for batch_idx, (data,target) in enumerate(train_distributed_dataset):
            
        worker = data.location
        print(worker)
        print(data)
        model.send(worker)

        optimizer.zero_grad()
        # update the model
        pred = model(data)
        loss = F.cross_entropy(pred, target)
        loss.backward()
        model.get()
        optimizer.step()

        if batch_idx % args.log_interval == 0:
            loss.get()
            print(loss)
#             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                 epoch, batch_idx * args.batch_size, len(train_loader) * args.batch_size,
#                 100. * batch_idx / len(train_loader), loss.data[0]))

In [15]:
args.epochs = 10


for epoch in range(1, args.epochs + 1):
    train(epoch)

<syft.core.workers.virtual.VirtualWorker id:0>
Variable containing:FloatTensor[_PointerTensor - id:37120276766 owner:me loc:0 id@loc:45432860737]


Exception: Tensor "18090280626" not found on worker "0"!!!

You just tried to interact with an object ID:18090280626 on worker 0 which does not exist!!! Use .send() and .get() on all your tensors to make sure they're on the same machines.

If you think this tensor does exist, check the ._objects dictionary on the worker and see for yourself!!! The most common reason this error happens is because someone calls .get() on the object's pointer without realizing it (which deletes the remote object and sends it to the pointer). Check your code to make sure you haven't already called .get() on this pointer!!!