# Learning private models with multiple teachers

Protocol:
1. Train teachers:
    - Devide training set into buckets (not overlapping)
    - Train a models (teacher) on each bucket
2. Train student:
    - Extract a share of the test set
    - Ensemble predictions from teachers: queries each teacher for predictions on the test set share
    - Aggregate teacher predictions to get student training labels using noising max: it
  adds Laplacian noise to label counts and returns the most frequent label
    - Train student with the aggregated label
    - Validate the student model on the remaining test data

http://www.cleverhans.io/privacy/2018/04/29/privacy-and-machine-learning.html
https://github.com/tensorflow/models/tree/master/research/differential_privacy/multiple_teachers

In [4]:
from syft.dp.pate import train_teachers, train_student
from torchvision import datasets, transforms

import torch
import torch.nn as nn
import torch.nn.functional as F

def prepare_mnist():
    kwargs = {"num_workers": 1}

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "./data",
            train=True,
            download=True,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=60000,
        shuffle=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "./data",
            train=False,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=10000,
        shuffle=False,
        **kwargs,
    )

    train_data, train_labels = next(iter(train_loader))
    test_data, test_labels = next(iter(test_loader))

    return train_data, train_labels, test_data, test_labels

In [5]:
dataset = "mnist" 
nb_labels = 10
nb_teachers = 100 
stdnt_share = 1000
lap_scale = 10

In [6]:
# For this demo, we use MNIST dataset
train_data, train_labels, test_data, test_labels = prepare_mnist()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [7]:
class CNN_Model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 5, stride = 1)
        self.batchnorm1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.avgpool1 = nn.AvgPool2d(2)
        self.conv2 = nn.Conv2d(16, 16, 5, stride = 1)
        self.batchnorm2 = nn.BatchNorm2d(16)
        self.relu2 = nn.ReLU()
        self.avgpool2 = nn.AvgPool2d(2)
        self.linear1 = nn.Linear(256, 100)
        self.batchnorm3 = nn.BatchNorm1d(100)
        self.relu3 = nn.ReLU()
        self.linear2 = nn.Linear(100, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.batchnorm1(x)
        x = self.relu1(x)
        x = self.avgpool1(x)
        x = self.conv2(x)
        x = self.batchnorm2(x)
        x = self.relu2(x)
        x = self.avgpool2(x)
        x = x.view(-1, 256)
        x = self.linear1(x)
        x = self.batchnorm3(x)
        x = self.relu3(x)
        out = self.linear2(x)
        return out

## Train Teachers

In [5]:
ckpt_path = 'checkpoint/'

for teacher_id in range(nb_teachers):
    
    filename = str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '.pth'
    
    model = CNN_Model(nb_labels)
    
    train_teachers(model, train_data, train_labels, test_data, test_labels, nb_teachers, teacher_id, filename)


Train teacher ID: 0




Train Accuracy: 369/600 (62%)
Train Accuracy: 536/600 (89%)
Train Accuracy: 571/600 (95%)
Train Accuracy: 589/600 (98%)
Train Accuracy: 595/600 (99%)
Train Accuracy: 599/600 (100%)
Train Accuracy: 600/600 (100%)
Train Accuracy: 600/600 (100%)
Train Accuracy: 600/600 (100%)
Train Accuracy: 600/600 (100%)
Test Accuracy: 9492/10000 (95%)

Train teacher ID: 1
Train Accuracy: 363/600 (60%)
Train Accuracy: 535/600 (89%)
Train Accuracy: 573/600 (96%)
Train Accuracy: 592/600 (99%)
Train Accuracy: 596/600 (99%)
Train Accuracy: 598/600 (100%)
Train Accuracy: 600/600 (100%)
Train Accuracy: 600/600 (100%)
Train Accuracy: 600/600 (100%)
Train Accuracy: 600/600 (100%)
Test Accuracy: 9557/10000 (96%)

Train teacher ID: 2
Train Accuracy: 356/600 (59%)
Train Accuracy: 525/600 (88%)
Train Accuracy: 565/600 (94%)
Train Accuracy: 584/600 (97%)
Train Accuracy: 589/600 (98%)
Train Accuracy: 597/600 (100%)
Train Accuracy: 597/600 (100%)
Train Accuracy: 597/600 (100%)
Train Accuracy: 597/600 (100%)
Train Accu

## Train Student

In [6]:
model = CNN_Model(nb_labels)

train_student(model, dataset, nb_labels, nb_teachers, stdnt_share, lap_scale)

  output_softmax = F.softmax(output).data.numpy()


Computed Teacher 0 softmax predictions
Computed Teacher 1 softmax predictions
Computed Teacher 2 softmax predictions
Computed Teacher 3 softmax predictions
Computed Teacher 4 softmax predictions
Computed Teacher 5 softmax predictions
Computed Teacher 6 softmax predictions
Computed Teacher 7 softmax predictions
Computed Teacher 8 softmax predictions
Computed Teacher 9 softmax predictions
Computed Teacher 10 softmax predictions
Computed Teacher 11 softmax predictions
Computed Teacher 12 softmax predictions
Computed Teacher 13 softmax predictions
Computed Teacher 14 softmax predictions
Computed Teacher 15 softmax predictions
Computed Teacher 16 softmax predictions
Computed Teacher 17 softmax predictions
Computed Teacher 18 softmax predictions
Computed Teacher 19 softmax predictions
Computed Teacher 20 softmax predictions
Computed Teacher 21 softmax predictions
Computed Teacher 22 softmax predictions
Computed Teacher 23 softmax predictions
Computed Teacher 24 softmax predictions
Computed T



Train Accuracy: 926/1000 (93%)
Train Accuracy: 966/1000 (97%)
Train Accuracy: 989/1000 (99%)
Train Accuracy: 998/1000 (100%)
Train Accuracy: 999/1000 (100%)
Train Accuracy: 1000/1000 (100%)
Train Accuracy: 1000/1000 (100%)
Train Accuracy: 1000/1000 (100%)
Train Accuracy: 1000/1000 (100%)
Train Accuracy: 1000/1000 (100%)
Test Accuracy: 8701/9000 (97%)

Precision of student after training: 0.9674444444444444


True