<a href="https://colab.research.google.com/github/wasiqrumaney/privacy/blob/master/notebooks/pate2017single.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Creating directories

In [0]:
!mkdir data
!mkdir models

### Importing libraries

In [0]:
%matplotlib inline
# !pip install -q tf-nightly-2.0-preview
import gzip
import math
import os
import sys
import tarfile

import numpy as np
from scipy.io import loadmat as loadmat
from six.moves import cPickle as pickle
from six.moves import urllib
from six.moves import xrange
import tensorflow as tf


from datetime import datetime as dt
import time

import seaborn as sns
import matplotlib.pyplot as plt


### Flags

In [0]:
dataset = 'mnist'
epochs_per_decay = 350
nb_teachers = 10
# teacher_id = 0
dropout_seed = 123
batch_size = 128
nb_labels = 10
deeper = False
batch_size = 128
max_steps = 3000
log_device_placement = False
learning_rate = 5
MOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.
LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.
train_dir = '/content/models'
data_dir = '/content/data'

stdnt_share = 1000
lap_scale = 10
teachers_dir = '/content/models'
teachers_max_steps = 3000


### Code from utils.py

In [0]:
def batch_indices(batch_nb, data_length, batch_size):
  """
  This helper function computes a batch start and end index
  :param batch_nb: the batch number
  :param data_length: the total length of the data being parsed by batches
  :param batch_size: the number of inputs in each batch
  :return: pair of (start, end) indices
  """
  # Batch start and end index
  start = int(batch_nb * batch_size)
  end = int((batch_nb + 1) * batch_size)

  # When there are not enough inputs left, we reuse some to complete the batch
  if end > data_length:
    shift = end - data_length
    start -= shift
    end -= shift

  return start, end

In [0]:
def accuracy(logits, labels):
  """
  Return accuracy of the array of logits (or label predictions) wrt the labels
  :param logits: this can either be logits, probabilities, or a single label
  :param labels: the correct labels to match against
  :return: the accuracy as a float
  """
  assert len(logits) == len(labels)

  if len(np.shape(logits)) > 1:
    # Predicted labels are the argmax over axis 1
    predicted_labels = np.argmax(logits, axis=1)
  else:
    # Input was already labels
    assert len(np.shape(logits)) == 1
    predicted_labels = logits

  # Check against correct labels to compute correct guesses
  correct = np.sum(predicted_labels == labels.reshape(len(labels)))

  # Divide by number of labels to obtain accuracy
  accuracy = float(correct) / len(labels)

  # Return float value
  return accuracy

###  Downloading dataset

In [0]:
file_urls = ['http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
               'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
               'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
               'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
              ]

In [0]:
def maybe_download(file_urls, directory):
  """Download a set of files in temporary local folder."""

  # This list will include all URLS of the local copy of downloaded files
  result = []

  # For each file of the dataset
  for file_url in file_urls:
    # Extract filename
    filename = file_url.split('/')[-1]

    # If downloading from GitHub, remove suffix ?raw=True from local filename
    if filename.endswith("?raw=true"):
      filename = filename[:-9]

    # Deduce local file url
    #filepath = os.path.join(directory, filename)
    filepath = directory + '/' + filename

    # Add to result list
    result.append(filepath)

    # Test if file already exists
    if not tf.gfile.Exists(filepath):
      def _progress(count, block_size, total_size):
        sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
            float(count * block_size) / float(total_size) * 100.0))
        sys.stdout.flush()
      filepath, _ = urllib.request.urlretrieve(file_url, filepath, _progress)
      print()
      statinfo = os.stat(filepath)
      print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')

  return result

In [8]:
local_urls = maybe_download(file_urls, '/content/data')

>> Downloading train-images-idx3-ubyte.gz 100.1%
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
>> Downloading train-labels-idx1-ubyte.gz 113.5%
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
>> Downloading t10k-images-idx3-ubyte.gz 100.4%
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
>> Downloading t10k-labels-idx1-ubyte.gz 180.4%
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.


In [0]:
def extract_mnist_data(filename, num_images, image_size, pixel_depth):
  """
  Extract the images into a 4D tensor [image index, y, x, channels].

  Values are rescaled from [0, 255] down to [-0.5, 0.5].
  """
  if not tf.gfile.Exists(filename+'.npy'):
    with gzip.open(filename) as bytestream:
      bytestream.read(16)
      buf = bytestream.read(image_size * image_size * num_images)
      data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
      data = (data - (pixel_depth / 2.0)) / pixel_depth
      data = data.reshape(num_images, image_size, image_size, 1)
      np.save(filename, data)
      return data
  else:
    with tf.gfile.Open(filename+'.npy', mode='rb') as file_obj:
      return np.load(file_obj)


def extract_mnist_labels(filename, num_images):
  """
  Extract the labels into a vector of int64 label IDs.
  """
  if not tf.gfile.Exists(filename+'.npy'):
    with gzip.open(filename) as bytestream:
      bytestream.read(8)
      buf = bytestream.read(1 * num_images)
      labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32)
      np.save(filename, labels)
    return labels
  else:
    with tf.gfile.Open(filename+'.npy', mode='rb') as file_obj:
      return np.load(file_obj)

In [0]:
# Extract it into np arrays.
train_data = extract_mnist_data(local_urls[0], 60000, 28, 1)
train_labels = extract_mnist_labels(local_urls[1], 60000)
test_data = extract_mnist_data(local_urls[2], 10000, 28, 1)
test_labels = extract_mnist_labels(local_urls[3], 10000)

In [0]:
def partition_dataset(data, labels, nb_teachers, teacher_id):
  """
  Simple partitioning algorithm that returns the right portion of the data
  needed by a given teacher out of a certain nb of teachers

  Args:
    data: input data to be partitioned
    labels: output data to be partitioned
    nb_teachers: number of teachers in the ensemble (affects size of each
                      partition)
    teacher_id: id of partition to retrieve
  """

  # Sanity check
  assert len(data) == len(labels)
  assert int(teacher_id) < int(nb_teachers)

  # This will floor the possible number of batches
  batch_len = int(len(data) / nb_teachers)

  # Compute start, end indices of partition
  start = teacher_id * batch_len
  end = (teacher_id+1) * batch_len

  # Slice partition off
  partition_data = data[start:end]
  partition_labels = labels[start:end]

  return partition_data, partition_labels

### Tensorboard

In [0]:
# Delete any old logs.... be smart while using this
% rm -rf /content/logs/

In [19]:
# Install latest Tensorflow build
!pip install -q tf-nightly-2.0-preview
import tensorflow as tf
%load_ext tensorboard

The tensorboard module is not an IPython extension.


In [0]:
% mkdir -p '/content/logs/tensorboard/teacher/'

In [0]:
% mkdir -p '/content/logs/tensorboard/student/'

In [22]:
import datetime
current_time = str(dt.now().timestamp())
teacher_log_dir = '/content/logs/tensorboard/teacher/' + current_time
student_log_dir = '/content/logs/tensorboard/student/' + current_time
teacher_summary_writer = summary.FileWriter(teacher_log_dir)
student_summary_writer = summary.FileWriter(student_log_dir)

NameError: ignored

### DeepCNN

In [0]:
def _variable_on_cpu(name, shape, initializer):
  """Helper to create a Variable stored on CPU memory.

  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable

  Returns:
    Variable Tensor
  """
  with tf.device('/cpu:0'):
    var = tf.get_variable(name, shape, initializer=initializer)
  return var

In [0]:
def _variable_with_weight_decay(name, shape, stddev, wd):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight
        decay is not added for this Variable.

  Returns:
    Variable Tensor
  """
  var = _variable_on_cpu(name, shape,
                         tf.truncated_normal_initializer(stddev=stddev))
  if wd is not None:
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    tf.add_to_collection('losses', weight_decay)
  return var

In [0]:
def inference(images, dropout=False):
  """Build the CNN model.
  Args:
    images: Images returned from distorted_inputs() or inputs().
    dropout: Boolean controlling whether to use dropout or not
  Returns:
    Logits
  """
  first_conv_shape = [5, 5, 1, 64]

  # conv1
  with tf.variable_scope('conv1') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=first_conv_shape,
                                         stddev=1e-4,
                                         wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(bias, name=scope.name)
    if dropout:
      conv1 = tf.nn.dropout(conv1, 0.3, seed=dropout_seed)


  # pool1
  pool1 = tf.nn.max_pool(conv1,
                         ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1],
                         padding='SAME',
                         name='pool1')

  # norm1
  norm1 = tf.nn.lrn(pool1,
                    4,
                    bias=1.0,
                    alpha=0.001 / 9.0,
                    beta=0.75,
                    name='norm1')

  # conv2
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 128],
                                         stddev=1e-4,
                                         wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1))
    bias = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(bias, name=scope.name)
    if dropout:
      conv2 = tf.nn.dropout(conv2, 0.3, seed=dropout_seed)


  # norm2
  norm2 = tf.nn.lrn(conv2,
                    4,
                    bias=1.0,
                    alpha=0.001 / 9.0,
                    beta=0.75,
                    name='norm2')

  # pool2
  pool2 = tf.nn.max_pool(norm2,
                         ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1],
                         padding='SAME',
                         name='pool2')

  # local3
  with tf.variable_scope('local3') as scope:
    # Move everything into depth so we can perform a single matrix multiply.
    reshape = tf.reshape(pool2, [batch_size, -1])
    dim = reshape.get_shape()[1].value
    weights = _variable_with_weight_decay('weights',
                                          shape=[dim, 384],
                                          stddev=0.04,
                                          wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    if dropout:
      local3 = tf.nn.dropout(local3, 0.5, seed=dropout_seed)

  # local4
  with tf.variable_scope('local4') as scope:
    weights = _variable_with_weight_decay('weights',
                                          shape=[384, 192],
                                          stddev=0.04,
                                          wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
    if dropout:
      local4 = tf.nn.dropout(local4, 0.5, seed=dropout_seed)

  # compute logits
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights',
                                          [192, nb_labels],
                                          stddev=1/192.0,
                                          wd=0.0)
    biases = _variable_on_cpu('biases',
                              [nb_labels],
                              tf.constant_initializer(0.0))
    logits = tf.add(tf.matmul(local4, weights), biases, name=scope.name)

  return logits

In [0]:
def inference_deeper(images, dropout=False):
  """Build a deeper CNN model.
  Args:
    images: Images returned from distorted_inputs() or inputs().
    dropout: Boolean controlling whether to use dropout or not
  Returns:
    Logits
  """
  if dataset == 'mnist':
    first_conv_shape = [3, 3, 1, 96]
  else:
    first_conv_shape = [3, 3, 3, 96]

  # conv1
  with tf.variable_scope('conv1') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=first_conv_shape,
                                         stddev=0.05,
                                         wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [96], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(bias, name=scope.name)

  # conv2
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[3, 3, 96, 96],
                                         stddev=0.05,
                                         wd=0.0)
    conv = tf.nn.conv2d(conv1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [96], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(bias, name=scope.name)

  # conv3
  with tf.variable_scope('conv3') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[3, 3, 96, 96],
                                         stddev=0.05,
                                         wd=0.0)
    conv = tf.nn.conv2d(conv2, kernel, [1, 2, 2, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [96], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv3 = tf.nn.relu(bias, name=scope.name)
    if dropout:
      conv3 = tf.nn.dropout(conv3, 0.5, seed=dropout_seed)

  # conv4
  with tf.variable_scope('conv4') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[3, 3, 96, 192],
                                         stddev=0.05,
                                         wd=0.0)
    conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv4 = tf.nn.relu(bias, name=scope.name)

  # conv5
  with tf.variable_scope('conv5') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[3, 3, 192, 192],
                                         stddev=0.05,
                                         wd=0.0)
    conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv5 = tf.nn.relu(bias, name=scope.name)

  # conv6
  with tf.variable_scope('conv6') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[3, 3, 192, 192],
                                         stddev=0.05,
                                         wd=0.0)
    conv = tf.nn.conv2d(conv5, kernel, [1, 2, 2, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.0))
    bias = tf.nn.bias_add(conv, biases)
    conv6 = tf.nn.relu(bias, name=scope.name)
    if dropout:
      conv6 = tf.nn.dropout(conv6, 0.5, seed=dropout_seed)


  # conv7
  with tf.variable_scope('conv7') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 192, 192],
                                         stddev=1e-4,
                                         wd=0.0)
    conv = tf.nn.conv2d(conv6, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    bias = tf.nn.bias_add(conv, biases)
    conv7 = tf.nn.relu(bias, name=scope.name)


  # local1
  with tf.variable_scope('local1') as scope:
    # Move everything into depth so we can perform a single matrix multiply.
    reshape = tf.reshape(conv7, [batch_size, -1])
    dim = reshape.get_shape()[1].value
    weights = _variable_with_weight_decay('weights',
                                          shape=[dim, 192],
                                          stddev=0.05,
                                          wd=0)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local1 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)

  # local2
  with tf.variable_scope('local2') as scope:
    weights = _variable_with_weight_decay('weights',
                                          shape=[192, 192],
                                          stddev=0.05,
                                          wd=0)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local2 = tf.nn.relu(tf.matmul(local1, weights) + biases, name=scope.name)
    if dropout:
      local2 = tf.nn.dropout(local2, 0.5, seed=dropout_seed)

  # compute logits
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights',
                                          [192, nb_labels],
                                          stddev=0.05,
                                          wd=0.0)
    biases = _variable_on_cpu('biases',
                              [nb_labels],
                              tf.constant_initializer(0.0))
    logits = tf.add(tf.matmul(local2, weights), biases, name=scope.name)

  return logits

In [0]:
def loss_fun(logits, labels):
  """Add L2Loss to all the trainable variables.

  Add summary for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]
    distillation: if set to True, use probabilities and not class labels to
                  compute softmax loss

  Returns:
    Loss tensor of type float.
  """

  # Calculate the cross entropy between labels and predictions
  labels = tf.cast(labels, tf.int64)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=labels, name='cross_entropy_per_example')

  # Calculate the average cross entropy loss across the batch.
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')

  # Add to TF collection for losses
  tf.add_to_collection('losses', cross_entropy_mean)

  # The total loss is defined as the cross entropy loss plus all of the weight
  # decay terms (L2 loss).
  return tf.add_n(tf.get_collection('losses'), name='total_loss')

In [0]:
def moving_av(total_loss):
  """
  Generates moving average for all losses

  Args:
    total_loss: Total loss from loss().
  Returns:
    loss_averages_op: op for generating moving averages of losses.
  """
  # Compute the moving average of all individual losses and the total loss.
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  losses = tf.get_collection('losses')
  loss_averages_op = loss_averages.apply(losses + [total_loss])

  return loss_averages_op

In [0]:
def train_op_fun(total_loss, global_step):
  """Train model.

  Create an optimizer and apply to all trainable variables. Add moving
  average for all trainable variables.

  Args:
    total_loss: Total loss from loss().
    global_step: Integer Variable counting the number of training steps
      processed.
  Returns:
    train_op: op for training.
  """
  # Variables that affect learning rate.
  nb_ex_per_train_epoch = int(60000 / nb_teachers)

  num_batches_per_epoch = nb_ex_per_train_epoch / batch_size
  decay_steps = int(num_batches_per_epoch * epochs_per_decay)

  initial_learning_rate = float(learning_rate) / 100.0

  # Decay the learning rate exponentially based on the number of steps.
  lr = tf.train.exponential_decay(initial_learning_rate,
                                  global_step,
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
  tf.summary.scalar('learning_rate', lr)

  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = moving_av(total_loss)

  # Compute gradients.
  with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    grads = opt.compute_gradients(total_loss)

  # Apply gradients.
  apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
    tf.summary.histogram(var.op.name, var)

  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
      MOVING_AVERAGE_DECAY, global_step)
  variables_averages_op = variable_averages.apply(tf.trainable_variables())

  with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
    train_op = tf.no_op(name='train')

  return train_op

In [0]:
def _input_placeholder():
  """
  This helper function declares a TF placeholder for the graph input data
  :return: TF placeholder for the graph input data
  """
  image_size = 28
  num_channels = 1

  # Declare data placeholder
  train_node_shape = (batch_size, image_size, image_size, num_channels)
  return tf.placeholder(tf.float32, shape=train_node_shape)

In [0]:
def train(images, labels, ckpt_path, dropout=False):
  """
  This function contains the loop that actually trains the model.
  :param images: a numpy array with the input data
  :param labels: a numpy array with the output labels
  :param ckpt_path: a path (including name) where model checkpoints are saved
  :param dropout: Boolean, whether to use dropout or not
  :return: True if everything went well
  """

  # Check training data
  assert len(images) == len(labels)
  assert images.dtype == np.float32
  assert labels.dtype == np.int32

  # Set default TF graph
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Declare data placeholder
    train_data_node = _input_placeholder()

    # Create a placeholder to hold labels
    train_labels_shape = (batch_size,)
    train_labels_node = tf.placeholder(tf.int32, shape=train_labels_shape)

    print("Done Initializing Training Placeholders")

    # Build a Graph that computes the logits predictions from the placeholder
    if deeper:
      logits = inference_deeper(train_data_node, dropout=dropout)
    else:
      logits = inference(train_data_node, dropout=dropout)

    # Calculate loss
    loss = loss_fun(logits, train_labels_node)
#     loss_scalar = tf.summary.scalar("loss",loss)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = train_op_fun(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

    print("Graph constructed and saver created")

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    # Create and init sessions
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) #NOLINT(long-line)
    sess.run(init)
    
    writer = tf.summary.FileWriter('board_beginner')  # create writer
    writer.add_graph(sess.graph)

    print("Session ready, beginning training loop")

    # Initialize the number of batches
    data_length = len(images)
    nb_batches = math.ceil(data_length / batch_size)

    for step in xrange(max_steps):
      # for debug, save start time
      start_time = time.time()

      # Current batch number
      batch_nb = step % nb_batches

      # Current batch start and end indices
      start, end = batch_indices(batch_nb, data_length, batch_size)

      # Prepare dictionnary to feed the session with
      feed_dict = {train_data_node: images[start:end],
                   train_labels_node: labels[start:end]}

      # Run training step
      _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
      loss_scalar = tf.summary.scalar("loss_value",loss_value)
#       with teacher_summary_writer.as_default():
#            summary.scalar('loss', loss_value, step=step)
#       teacher_summary_writer.scalar('loss', loss_value, step=step)


      # Compute duration of training step
      duration = time.time() - start_time

      # Sanity check
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      # Echo loss once in a while
      if step % 100 == 0:
        sum1 = sess.run(loss_scalar, feed_dict=feed_dict)
        writer.add_summary(sum1,step)
        num_examples_per_step = batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (dt.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == max_steps:
        saver.save(sess, ckpt_path, global_step=step)
        
    saver.save(sess, "model_beginner")

  return True

In [0]:
def softmax_preds(images, ckpt_path, return_logits=False):
  """
  Compute softmax activations (probabilities) with the model saved in the path
  specified as an argument
  :param images: a np array of images
  :param ckpt_path: a TF model checkpoint
  :param logits: if set to True, return logits instead of probabilities
  :return: probabilities (or logits if logits is set to True)
  """
  
  # Compute nb samples and deduce nb of batches
  data_length = len(images)
  nb_batches = math.ceil(len(images) / batch_size)

  # Declare data placeholder
  train_data_node = _input_placeholder()

  # Build a Graph that computes the logits predictions from the placeholder
  if deeper:
    logits = inference_deeper(train_data_node)
  else:
    logits = inference(train_data_node)

  if return_logits:
    # We are returning the logits directly (no need to apply softmax)
    output = logits
  else:
    # Add softmax predictions to graph: will return probabilities
    output = tf.nn.softmax(logits)

  # Restore the moving average version of the learned variables for eval.
  variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
  variables_to_restore = variable_averages.variables_to_restore()
  saver = tf.train.Saver(variables_to_restore)

  # Will hold the result
  preds = np.zeros((data_length, nb_labels), dtype=np.float32)

  # Create TF session
  with tf.Session() as sess:
    # Restore TF session from checkpoint file
    saver.restore(sess, ckpt_path)

    # Parse data by batch
    for batch_nb in xrange(0, int(nb_batches+1)):
      # Compute batch start and end indices
      start, end = batch_indices(batch_nb, data_length, batch_size)

      # Prepare feed dictionary
      feed_dict = {train_data_node: images[start:end]}

      # Run session ([0] because run returns a batch with len 1st dim == 1)
      preds[start:end, :] = sess.run([output], feed_dict=feed_dict)[0]

  # Reset graph to allow multiple calls
  tf.reset_default_graph()

  return preds

# Teacher training

### Teacher 0

In [0]:
teacher_id = 0
tf.summary.FileWriterCache.clear()
import datetime

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
accuracy_scalar = tf.summary.scalar("accuracy",precision)
print('Precision of teacher after training: ' + str(precision))

Length of training data: 6000
Done Initializing Training Placeholders


W0625 02:53:19.358334 140147143387008 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/moving_averages.py:433: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Graph constructed and saver created
Session ready, beginning training loop
2019-06-25 02:53:26.016607: step 0, loss = 8.45 (35.6 examples/sec; 3.596 sec/batch)
2019-06-25 02:53:30.359268: step 100, loss = 7.95 (2244.2 examples/sec; 0.057 sec/batch)
2019-06-25 02:53:35.094154: step 200, loss = 6.23 (2504.4 examples/sec; 0.051 sec/batch)
2019-06-25 02:53:40.385384: step 300, loss = 5.66 (2313.5 examples/sec; 0.055 sec/batch)
2019-06-25 02:53:46.277029: step 400, loss = 5.32 (2105.3 examples/sec; 0.061 sec/batch)
2019-06-25 02:53:52.836401: step 500, loss = 5.08 (1842.8 examples/sec; 0.069 sec/batch)
2019-06-25 02:54:00.167333: step 600, loss = 5.07 (1735.2 examples/sec; 0.074 sec/batch)
2019-06-25 02:54:08.200480: step 700, loss = 4.68 (1540.9 examples/sec; 0.083 sec/batch)
2019-06-25 02:54:16.807977: step 800, loss = 4.51 (1447.1 examples/sec; 0.088 sec/batch)
2019-06-25 02:54:25.970224: step 900, loss = 5.06 (1408.6 examples/sec; 0.091 sec/batch)
2019-06-25 02:54:35.660604: step 1000, 

In [0]:
%load_ext tensorboard
%tensorboard --logdir /content/board_beginner

### Teacher 1

In [0]:
teacher_id = 1

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 2

In [0]:
teacher_id = 2

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 3

In [0]:
teacher_id = 3

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 4

In [0]:
teacher_id = 4

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 5

In [0]:
teacher_id = 5

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 6

In [0]:
teacher_id = 6

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 7

In [0]:
teacher_id = 7

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 8

In [0]:
teacher_id = 8

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

### Teacher 9

In [0]:
teacher_id = 9

# Retrieve subset of data for this teacher
data, labels = partition_dataset(train_data, train_labels, nb_teachers, teacher_id)
print("Length of training data: " + str(len(labels)))

# Define teacher checkpoint filename and full path
if deeper:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt'
else:
  filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
ckpt_path = train_dir + '/' + str(dataset) + '_' + filename

# Perform teacher training
assert train(data, labels, ckpt_path)

# Append final step value to checkpoint for evaluation
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Retrieve teacher probability estimates on the test data
teacher_preds = softmax_preds(test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(teacher_preds, test_labels)
print('Precision of teacher after training: ' + str(precision))

# Student

### Aggregation

In [0]:
def labels_from_probs(probs):
  """
  Helper function: computes argmax along last dimension of array to obtain
  labels (max prob or max logit value)
  :param probs: numpy array where probabilities or logits are on last dimension
  :return: array with same shape as input besides last dimension with shape 1
          now containing the labels
  """
  # Compute last axis index
  last_axis = len(np.shape(probs)) - 1

  # Label is argmax over last dimension
  labels = np.argmax(probs, axis=last_axis)

  # Return as np.int32
  return np.asarray(labels, dtype=np.int32)

In [0]:
def noisy_max(logits, lap_scale, return_clean_votes=False):
  """
  This aggregation mechanism takes the softmax/logit output of several models
  resulting from inference on identical inputs and computes the noisy-max of
  the votes for candidate classes to select a label for each sample: it
  adds Laplacian noise to label counts and returns the most frequent label.
  :param logits: logits or probabilities for each sample
  :param lap_scale: scale of the Laplacian noise to be added to counts
  :param return_clean_votes: if set to True, also returns clean votes (without
                      Laplacian noise). This can be used to perform the
                      privacy analysis of this aggregation mechanism.
  :return: pair of result and (if clean_votes is set to True) the clean counts
           for each class per sample and the original labels produced by
           the teachers.
  """

  # Compute labels from logits/probs and reshape array properly
  labels = labels_from_probs(logits)
  labels_shape = np.shape(labels)
  labels = labels.reshape((labels_shape[0], labels_shape[1]))

  # Initialize array to hold final labels
  result = np.zeros(int(labels_shape[1]))

  if return_clean_votes:
    # Initialize array to hold clean votes for each sample
    clean_votes = np.zeros((int(labels_shape[1]), 10))

  # Parse each sample
  for i in xrange(int(labels_shape[1])):
    # Count number of votes assigned to each class
    label_counts = np.bincount(labels[:, i], minlength=10)

    if return_clean_votes:
      # Store vote counts for export
      clean_votes[i] = label_counts

    # Cast in float32 to prepare before addition of Laplacian noise
    label_counts = np.asarray(label_counts, dtype=np.float32)

    # Sample independent Laplacian noise for each class
    for item in xrange(10):
      label_counts[item] += np.random.laplace(loc=0.0, scale=float(lap_scale))

    # Result is the most frequent label
    result[i] = np.argmax(label_counts)

  # Cast labels to np.int32 for compatibility with deep_cnn.py feed dictionaries
  result = np.asarray(result, dtype=np.int32)

  if return_clean_votes:
    # Returns several array, which are later saved:
    # result: labels obtained from the noisy aggregation
    # clean_votes: the number of teacher votes assigned to each sample and class
    # labels: the labels assigned by teachers (before the noisy aggregation)
    return result, clean_votes, labels
  else:
    # Only return labels resulting from noisy aggregation
    return result

In [0]:
def aggregation_most_frequent(logits):
  """
  This aggregation mechanism takes the softmax/logit output of several models
  resulting from inference on identical inputs and computes the most frequent
  label. It is deterministic (no noise injection like noisy_max() above.
  :param logits: logits or probabilities for each sample
  :return:
  """
  # Compute labels from logits/probs and reshape array properly
  labels = labels_from_probs(logits)
  labels_shape = np.shape(labels)
  labels = labels.reshape((labels_shape[0], labels_shape[1]))

  # Initialize array to hold final labels
  result = np.zeros(int(labels_shape[1]))

  # Parse each sample
  for i in xrange(int(labels_shape[1])):
    # Count number of votes assigned to each class
    label_counts = np.bincount(labels[:, i], minlength=10)

    label_counts = np.asarray(label_counts, dtype=np.int32)

    # Result is the most frequent label
    result[i] = np.argmax(label_counts)

  return np.asarray(result, dtype=np.int32)

### Student training

In [0]:
def ensemble_preds(dataset, nb_teachers, stdnt_data):
  """
  Given a dataset, a number of teachers, and some input data, this helper
  function queries each teacher for predictions on the data and returns
  all predictions in a single array. (That can then be aggregated into
  one single prediction per input using aggregation.py (cf. function
  prepare_student_data() below)
  :param dataset: string corresponding to mnist, cifar10, or svhn
  :param nb_teachers: number of teachers (in the ensemble) to learn from
  :param stdnt_data: unlabeled student training data
  :return: 3d array (teacher id, sample id, probability per class)
  """

  # Compute shape of array that will hold probabilities produced by each
  # teacher, for each training point, and each output class
  result_shape = (nb_teachers, len(stdnt_data), nb_labels)

  # Create array that will hold result
  result = np.zeros(result_shape, dtype=np.float32)

  # Get predictions from each teacher
  for teacher_id in xrange(nb_teachers):
    # Compute path of checkpoint file for teacher model with ID teacher_id
    if deeper:
      ckpt_path = teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt-' + str(teachers_max_steps - 1) #NOLINT(long-line)
    else:
      ckpt_path = teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt-' + str(teachers_max_steps - 1)  # NOLINT(long-line)

    # Get predictions on our training data and store in result array
    result[teacher_id] = softmax_preds(stdnt_data, ckpt_path)

    # This can take a while when there are a lot of teachers so output status
    print("Computed Teacher " + str(teacher_id) + " softmax predictions")

  return result

In [0]:
def prepare_student_data(dataset, nb_teachers, save=False):
  """
  Takes a dataset name and the size of the teacher ensemble and prepares
  training data for the student model, according to parameters indicated
  in flags above.
  :param dataset: string corresponding to mnist, cifar10, or svhn
  :param nb_teachers: number of teachers (in the ensemble) to learn from
  :param save: if set to True, will dump student training labels predicted by
               the ensemble of teachers (with Laplacian noise) as npy files.
               It also dumps the clean votes for each class (without noise) and
               the labels assigned by teachers
  :return: pairs of (data, labels) to be used for student training and testing
  """

  # Make sure there is data leftover to be used as a test set
  assert stdnt_share < len(test_data)

  # Prepare [unlabeled] student training data (subset of test set)
  stdnt_data = test_data[:stdnt_share]

  # Compute teacher predictions for student training data
  teachers_preds = ensemble_preds(dataset, nb_teachers, stdnt_data)

  # Aggregate teacher predictions to get student training labels
  if not save:
    stdnt_labels = noisy_max(teachers_preds, lap_scale)
  else:
    # Request clean votes and clean labels as well
    stdnt_labels, clean_votes, labels_for_dump = noisy_max(teachers_preds, lap_scale, return_clean_votes=True) #NOLINT(long-line)

    # Prepare filepath for numpy dump of clean votes
    filepath = data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_clean_votes_lap_' + str(lap_scale) + '.npy'  # NOLINT(long-line)

    # Prepare filepath for numpy dump of clean labels
    filepath_labels = data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_teachers_labels_lap_' + str(lap_scale) + '.npy'  # NOLINT(long-line)

    # Dump clean_votes array
    with tf.gfile.Open(filepath, mode='w') as file_obj:
      np.save(file_obj, clean_votes)

    # Dump labels_for_dump array
    with tf.gfile.Open(filepath_labels, mode='w') as file_obj:
      np.save(file_obj, labels_for_dump)

  # Print accuracy of aggregated labels
  ac_ag_labels = accuracy(stdnt_labels, test_labels[:stdnt_share])
  print("Accuracy of the aggregated labels: " + str(ac_ag_labels))

  # Store unused part of test set for use as a test set after student training
  stdnt_test_data = test_data[stdnt_share:]
  stdnt_test_labels = test_labels[stdnt_share:]

  if save:
    # Prepare filepath for numpy dump of labels produced by noisy aggregation
    filepath = data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_labels_lap_' + str(lap_scale) + '.npy' #NOLINT(long-line)

    # Dump student noisy labels array
    with tf.gfile.Open(filepath, mode='w') as file_obj:
      np.save(file_obj, stdnt_labels)

  return stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels

In [0]:
# Call helper function to prepare student data using teacher predictions
stdnt_dataset = prepare_student_data(dataset, nb_teachers, save=True)

# Unpack the student dataset
stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset

# Prepare checkpoint filename and path
if deeper:
  ckpt_path = train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line)
else:
  ckpt_path = train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt'  # NOLINT(long-line)

# Start student training
assert train(stdnt_data, stdnt_labels, ckpt_path)

# Compute final checkpoint name for student (with max number of steps)
ckpt_path_final = ckpt_path + '-' + str(max_steps - 1)

# Compute student label predictions on remaining chunk of test set
student_preds = softmax_preds(stdnt_test_data, ckpt_path_final)

# Compute teacher accuracy
precision = accuracy(student_preds, stdnt_test_labels)
print('Precision of student after training: ' + str(precision))