In [0]:
!pip install -U dm-sonnet==1.23

Collecting dm-sonnet==1.23
[?25l  Downloading https://files.pythonhosted.org/packages/83/c7/e32a7d83724f26e921dcdd7ddd8f30e6e92cb4e68c740960307616b6ada8/dm_sonnet-1.23-py3-none-any.whl (616kB)
[K    100% |████████████████████████████████| 624kB 21.6MB/s 
Installing collected packages: dm-sonnet
Successfully installed dm-sonnet-1.23


In [0]:

from __future__ import print_function

import os
import subprocess
import tempfile

import matplotlib.pyplot as plt
import numpy as np
import sonnet as snt
import tensorflow as tf
import tarfile
from mpl_toolkits.mplot3d import Axes3D

from six.moves import cPickle
from six.moves import urllib
from six.moves import xrange

In [0]:
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train = np.resize(x_train, (60000,28,28,1))
x_test = np.resize(x_test, (10000,28,28,1))
train_dict = { "images": x_train, "labels":y_train}
test_dict = { "images": x_test, "labels":y_test}
def cast_and_normalise_images(data_dict):
 #Convert images to floating point with the range [-0.5, 0.5]
    images = data_dict['images']
    data_dict['images'] = (tf.cast(images, tf.float32) / 255.0) - 0.5
    return data_dict
data_variance = np.var(train_dict['images'] / 255.0)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [0]:
def residual_stack(h, num_hiddens, num_residual_layers, num_residual_hiddens):
    for i in range(num_residual_layers):
        h_i = tf.nn.relu(h)

        h_i = snt.Conv2D(
            output_channels=num_residual_hiddens,
            kernel_shape=(3, 3),
            stride=(1, 1),
            name="res3x3_%d" % i)(h_i)
        h_i = tf.nn.relu(h_i)

        h_i = snt.Conv2D(
            output_channels=num_hiddens,
            kernel_shape=(1, 1),
            stride=(1, 1),
            name="res1x1_%d" % i)(h_i)
        h += h_i
    return tf.nn.relu(h)

class Encoder(snt.AbstractModule):
    def __init__(self, num_hiddens, num_residual_layers, num_residual_hiddens,
               name='encoder'):
        super(Encoder, self).__init__(name=name)
        self._num_hiddens = num_hiddens
        self._num_residual_layers = num_residual_layers
        self._num_residual_hiddens = num_residual_hiddens
    def _build(self, x):
        h = snt.Conv2D(
            output_channels=self._num_hiddens / 2,
            kernel_shape=(4, 4),
            stride=(2, 2),
            name="enc_1")(x) # (?, 16, 16, 64)
        h = tf.nn.relu(h)

        h = snt.Conv2D(
            output_channels=self._num_hiddens,
            kernel_shape=(4, 4), # (?, 8, 8, 128)
            stride=(2, 2),
            name="enc_4")(h)
        h = tf.nn.relu(h)
#         h = tf.layers.conv2d(inputs=h, filters=self._num_hiddens, kernel_size=[4,4], strides=[3,3], padding="valid", activation=tf.nn.relu)
        h = snt.Conv2D(
            output_channels=self._num_hiddens,
            kernel_shape=(3, 3), # (?, 8, 8, 128)
            stride=(1, 1),
            name="enc_5")(h)
        
        h = residual_stack(
            h,
            self._num_hiddens,
            self._num_residual_layers,
            self._num_residual_hiddens)
        return h # (?, 8, 8, 128)

class Decoder(snt.AbstractModule):
    def __init__(self, num_hiddens, num_residual_layers, num_residual_hiddens,
                   name='decoder'):
        super(Decoder, self).__init__(name=name)
        self._num_hiddens = num_hiddens
        self._num_residual_layers = num_residual_layers
        self._num_residual_hiddens = num_residual_hiddens
    def _build(self, x):
        h = snt.Conv2D(
          output_channels=self._num_hiddens,
          kernel_shape=(3, 3),
          stride=(1, 1),
          name="dec_1")(x) # (?, 8, 8, 128)

        h = residual_stack(
            h,
            self._num_hiddens,
            self._num_residual_layers,
            self._num_residual_hiddens) # (?, 8, 8, 128)
        
#         h = tf.layers.conv2d_transpose(inputs=h, filters=self._num_hiddens, kernel_size=[4,4], strides=[3,3], padding="valid", activation=tf.nn.relu)
        h = snt.Conv2DTranspose(
            output_channels=int(self._num_hiddens / 2),
            output_shape=None,
            kernel_shape=(4, 4),
            stride=(2, 2),
            name="dec_4")(h)
        h = tf.nn.relu(h)
        
        x_recon = snt.Conv2DTranspose(
            output_channels=1,
        output_shape=None,
        kernel_shape=(4, 4),
        stride=(2, 2),
        name="dec_5")(h) # (?, 32, 32, 3)
        return x_recon

In [0]:
tf.reset_default_graph()

# Set hyper-parameters.
batch_size = 32
image_size = 28

# 100k steps should take < 30 minutes on a modern (>= 2017) GPU.
num_training_updates = 40000

num_hiddens = 64
num_residual_hiddens = 32
num_residual_layers = 2
# These hyper-parameters define the size of the model (number of parameters and layers).
# The hyper-parameters in the paper were (For ImageNet):
# batch_size = 128
# image_size = 128
# num_hiddens = 128
# num_residual_hiddens = 32
# num_residual_layers = 2

# This value is not that important, usually 64 works.
# This will not change the capacity in the information-bottleneck.
embedding_dim = 64

# The higher this value, the higher the capacity in the information bottleneck.
num_embeddings = 256

# commitment_cost should be set appropriately. It's often useful to try a couple
# of values. It mostly depends on the scale of the reconstruction cost
# (log p(x|z)). So if the reconstruction cost is 100x higher, the
# commitment_cost should also be multiplied with the same amount.
commitment_cost = 0.25

# Use EMA updates for the codebook (instead of the Adam optimizer).
# This typically converges faster, and makes the model less dependent on choice
# of the optimizer. In the VQ-VAE paper EMA updates were not used (but was
# developed afterwards). See Appendix of the paper for more details.
vq_use_ema = False

# This is only used for EMA updates.
decay = 0.99

learning_rate = 3e-4

# Data Loading.
train_dataset_iterator = (
    tf.data.Dataset.from_tensor_slices(train_dict)
    .map(cast_and_normalise_images)
    .shuffle(10000)
    .repeat(-1)  # repeat indefinitely
    .batch(batch_size)).make_one_shot_iterator()

test_dataset_iterator = (
    tf.data.Dataset.from_tensor_slices(test_dict)
    .map(cast_and_normalise_images)
    .repeat(1)  # 1 epoch
    .batch(batch_size)).make_initializable_iterator()
train_dataset_batch = train_dataset_iterator.get_next()
test_dataset_batch = test_dataset_iterator.get_next()



def get_images(sess, subset='train'):
  if subset == 'train':
    return sess.run(train_dataset_batch)
  elif subset == 'test':
    return sess.run(test_dataset_batch)
  
batch_size_classification=1
train_dataset_iterator_classification = (
    tf.data.Dataset.from_tensor_slices(train_dict)
    .map(cast_and_normalise_images)
    .repeat(1)
    .batch(batch_size_classification)).make_one_shot_iterator()
test_dataset_iterator_classification = (
    tf.data.Dataset.from_tensor_slices(test_dict)
    .map(cast_and_normalise_images)
    .repeat(1)  
    .batch(batch_size_classification)).make_one_shot_iterator()
train_dataset_batch_classification = train_dataset_iterator_classification.get_next()
test_dataset_batch_classification = test_dataset_iterator_classification.get_next()

def get_images_classification(sess, subset='train'):
  if subset == 'train':
    return sess.run(train_dataset_batch_classification)
  elif subset == 'test':
    return sess.run(test_dataset_batch_classification)

In [0]:
# Copyright 2018 The Sonnet Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Sonnet implementation of VQ-VAE https://arxiv.org/abs/1711.00937."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from sonnet.python.modules import base


import tensorflow as tf

from tensorflow.python.training import moving_averages


class VectorQuantizer(base.AbstractModule):
  """Sonnet module representing the VQ-VAE layer.
  Implements the algorithm presented in
  'Neural Discrete Representation Learning' by van den Oord et al.
  https://arxiv.org/abs/1711.00937
  Input any tensor to be quantized. Last dimension will be used as space in
  which to quantize. All other dimensions will be flattened and will be seen
  as different examples to quantize.
  The output tensor will have the same shape as the input.
  For example a tensor with shape [16, 32, 32, 64] will be reshaped into
  [16384, 64] and all 16384 vectors (each of 64 dimensions)  will be quantized
  independently.
  Args:
    embedding_dim: integer representing the dimensionality of the tensors in the
      quantized space. Inputs to the modules must be in this format as well.
    num_embeddings: integer, the number of vectors in the quantized space.
    commitment_cost: scalar which controls the weighting of the loss terms
      (see equation 4 in the paper - this variable is Beta).
  """

  def __init__(self, embedding_dim, num_embeddings, commitment_cost,
               name='vq_layer'):
    super(VectorQuantizer, self).__init__(name=name)
    self._embedding_dim = embedding_dim
    self._num_embeddings = num_embeddings
    self._commitment_cost = commitment_cost

    with self._enter_variable_scope():
      initializer = tf.uniform_unit_scaling_initializer()
      self._w = tf.get_variable('embedding', [embedding_dim, num_embeddings],
                                initializer=initializer, trainable=True)

  def _build(self, inputs, is_training):
    """Connects the module to some inputs.
    Args:
      inputs: Tensor, final dimension must be equal to embedding_dim. All other
        leading dimensions will be flattened and treated as a large batch.
      is_training: boolean, whether this connection is to training data.
    Returns:
      dict containing the following keys and values:
        quantize: Tensor containing the quantized version of the input.
        loss: Tensor containing the loss to optimize.
        perplexity: Tensor containing the perplexity of the encodings.
        encodings: Tensor containing the discrete encodings, ie which element
          of the quantized space each input element was mapped to.
        encoding_indices: Tensor containing the discrete encoding indices, ie
          which element of the quantized space each input element was mapped to.
    """
    # Assert last dimension is same as self._embedding_dim
    input_shape = tf.shape(inputs)
    with tf.control_dependencies([
        tf.Assert(tf.equal(input_shape[-1], self._embedding_dim),
                  [input_shape])]):
      flat_inputs = tf.reshape(inputs, [-1, self._embedding_dim])

    distances = (tf.reduce_sum(flat_inputs**2, 1, keepdims=True)
                 - 2 * tf.matmul(flat_inputs, self._w)
                 + tf.reduce_sum(self._w ** 2, 0, keepdims=True))

    encoding_indices_old = tf.argmax(- distances, 1)
    encodings = tf.one_hot(encoding_indices_old, self._num_embeddings)
    encoding_indices = tf.reshape(encoding_indices_old, tf.shape(inputs)[:-1])
    quantized = self.quantize(encoding_indices)

    lamda = 1
    e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized) - inputs) ** 2)
    q_latent_loss = tf.reduce_mean((quantized - tf.stop_gradient(inputs)) ** 2)
    loss = lamda * q_latent_loss + self._commitment_cost * e_latent_loss

    quantized = inputs + tf.stop_gradient(quantized - inputs)
    avg_probs = tf.reduce_mean(encodings, 0)
    perplexity = tf.exp(- tf.reduce_sum(avg_probs * tf.log(avg_probs + 1e-10)))

    return {'quantize': quantized,
            'loss': loss,
            'perplexity': perplexity,
            'encodings': encodings,
            'encoding_indices': encoding_indices,'encoding_indices_old': encoding_indices_old,}

  @property
  def embeddings(self):
    return self._w

  def quantize(self, encoding_indices):
    with tf.control_dependencies([encoding_indices]):
      w = tf.transpose(self.embeddings.read_value(), [1, 0])
    return tf.nn.embedding_lookup(w, encoding_indices, validate_indices=False)


In [0]:

# Build modules.
encoder = Encoder(num_hiddens, num_residual_layers, num_residual_hiddens)
decoder = Decoder(num_hiddens, num_residual_layers, num_residual_hiddens)
pre_vq_conv1 = snt.Conv2D(output_channels=embedding_dim,
    kernel_shape=(1, 1),
    stride=(1, 1),
    name="to_vq") # (?, 8, 8, 64)

if vq_use_ema:
  vq_vae =VectorQuantizerEMA(
      embedding_dim=embedding_dim,
      num_embeddings=num_embeddings,
      commitment_cost=commitment_cost,
      decay=decay)
else:
  vq_vae =VectorQuantizer(
      embedding_dim=embedding_dim,
      num_embeddings=num_embeddings,
      commitment_cost=commitment_cost)

# Process inputs with conv stack, finishing with 1x1 to get to correct size.
x = tf.placeholder(tf.float32, shape=(None, image_size, image_size, 1))
z = pre_vq_conv1(encoder(x))

# vq_output_train["quantize"] are the quantized outputs of the encoder.
# That is also what is used during training with the straight-through estimator. 
# To get the one-hot coded assignments use vq_output_train["encodings"] instead.
# These encodings will not pass gradients into to encoder, 
# but can be used to train a PixelCNN on top afterwards.

# For training
vq_output_train = vq_vae(z, is_training=True)
x_recon = decoder(vq_output_train["quantize"])
recon_error = tf.reduce_mean((x_recon - x)**2) / data_variance  # Normalized MSE
loss = recon_error + vq_output_train["loss"]

# For evaluation, make sure is_training=False!
vq_output_eval = vq_vae(z, is_training=False)
x_recon_eval = decoder(vq_output_eval["quantize"])
codebook=vq_vae.embeddings
indices= vq_output_train["encoding_indices"]
indices_old = vq_output_train["encoding_indices_old"]

# The following is a useful value to track during training.
# It indicates how many codes are 'active' on average.
perplexity = vq_output_train["perplexity"] 

# Create optimizer and TF session.
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss)
sess = tf.train.SingularMonitoredSession()
# Train.
train_res_recon_error = []
train_res_perplexity = []
for i in xrange(num_training_updates):
  feed_dict = {x: get_images(sess)['images']}
  if i!=num_training_updates:
    results = sess.run([train_op, recon_error, perplexity,codebook],
                       feed_dict=feed_dict)
    train_res_recon_error.append(results[1])
    train_res_perplexity.append(results[2])
  else:
    results = sess.run([train_op, recon_error, perplexity,codebook],
                       feed_dict=feed_dict)
    train_res_recon_error.append(results[1])
    train_res_perplexity.append(results[2])
  if (i+1) % 100 == 0:
    print('%d iterations' % (i+1))
    print('recon_error: %.3f' % np.mean(train_res_recon_error[-100:]))
    print('perplexity: %.3f' % np.mean(train_res_perplexity[-100:]))
    print()

Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
100 iterations
recon_error: 1.111
perplexity: 2.548

200 iterations
recon_error: 0.419
perplexity: 3.514

300 iterations
recon_error: 0.271
perplexity: 7.099

400 iterations
recon_error: 0.207
perplexity: 10.336

500 iterations
recon_error: 0.178
perplexity: 13.370

600 iterations
recon_error: 0.165
perplexity: 15.403

700 iterations
recon_error: 0.153
perplexity: 16.152

800 iterations
recon_error: 0.145
perplexity: 17.017

900 iterations
recon_error: 0.140
perplexity: 18.477

1000 iterations
recon_error: 0.138
perplexity: 20.329

1100 iterations
recon_error: 0.131
perplexity: 20.882

1200 iterations
recon_error: 0.130
perplexity: 21.683

1300 iterations
recon_error: 0.126
perplexity: 22.090

1400 iterations
recon_error: 0.122
perplexity: 22.6

In [0]:
# Reconstructions
train_originals = get_images(sess, subset='train')['images']
feed_dict = {x: train_originals}
train_reconstructions = sess.run([x_recon_eval,indices,indices_old], feed_dict=feed_dict)
sess.run(test_dataset_iterator.initializer)
test_originals = get_images(sess, subset='test')['images']
feed_dict = {x: test_originals}
test_reconstructions = sess.run([x_recon_eval,indices,indices_old] ,feed_dict=feed_dict)

In [0]:
def convert_batch_to_image_grid(image_batch):
  reshaped = (image_batch.reshape(4, 8, 28, 28, 1)
              .transpose(0, 2, 1, 3, 4)
              .reshape(4 * 28, 8 * 28, 1))
  reshaped+=0.5
  for i in range(reshaped.shape[0]):
    for j in range(reshaped.shape[1]):
      for k in range(reshaped.shape[2]):
        if reshaped[i,j,k]>1:
          reshaped[i,j,k]=1
        elif reshaped[i,j,k]<0:
          reshaped[i,j,k]=0        
  return reshaped
f = plt.figure(figsize=(16,8))
ax = f.add_subplot(2,2,1)
ax.imshow(np.resize(convert_batch_to_image_grid(train_originals), (112, 224)),
          interpolation='nearest')
ax.set_title('training data originals')
plt.axis('off')

ax = f.add_subplot(2,2,2)
ax.imshow(np.resize(convert_batch_to_image_grid(train_reconstructions[0]),(112,224)),
          interpolation='nearest')
ax.set_title('training data reconstructions')
plt.axis('off')

ax = f.add_subplot(2,2,3)
ax.imshow(np.resize(convert_batch_to_image_grid(test_originals), (112, 224)),
          interpolation='nearest')
ax.set_title('validation data originals')
plt.axis('off')

ax = f.add_subplot(2,2,4)
ax.imshow(np.resize(convert_batch_to_image_grid(test_reconstructions[0]),(112,224)),
          interpolation='nearest')
ax.set_title('validation data reconstructions')
plt.axis('off')


In [0]:
!pip install scipy
import scipy.io as sio
from google.colab import files
sio.savemat('codebook.mat',{'codebook': results[3]})
files.download('codebook.mat')

In [0]:
label=[]
train_index=[]
original = []
for i in range(60000):
  train_originals = get_images_classification(sess, subset='train')
  feed_dict = {x: train_originals['images']}
  train_labels = train_originals['labels']
  train_reconstructions = sess.run([x_recon_eval,indices,z], feed_dict=feed_dict)
  train_index.append(train_reconstructions[1][0])
  original.append(train_reconstructions[2][0])
  label.append(train_labels)
sio.savemat('train_index.mat',{'train_index': train_index})
files.download('train_index.mat')
sio.savemat('label.mat',{'label': label})
files.download('label.mat')

In [0]:
new=[]
for i in range(40000):
  for j in range(8):
    for k in range(8):
      new.append(original[i][j][k][:])
      
mapping=[]
for i in range(40000):
  for j in range(8):
    for k in range(8):
      mapping.append(results[3][:,train_index[i][j][k]])
Install the PyDrive wrapper & import libraries.
This only needs to be done once in a notebook.
!pip install -U -q PyDrive
import scipy.io as sio
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


# Create & upload a file.
sio.savemat('new.mat',{'new': new})
sio.savemat('mapping.mat', {'mapping': mapping})
uploaded = drive.CreateFile({'title': 'new.mat'})
uploaded.SetContentFile('new.mat')
uploaded.Upload()
uploaded1 = drive.CreateFile({'title': 'mapping.mat'})
uploaded1.SetContentFile('mapping.mat')
uploaded1.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))

In [0]:
table = np.zeros([num_embeddings,num_embeddings])
for j in range(num_embeddings):
    for k in range(j+1,num_embeddings):
        dis = 0
        for i in range(embedding_dim):
            table[j][k] += (results[3][i][j]-results[3][i][k]) ** 2
        table[k][j] = table[j][k]
sio.savemat('table.mat',{'table': table})
files.download('table.mat')

In [0]:
test_label=[]
test_index=[]
for i in range(10000):
  test_originals = get_images_classification(sess, subset='test')
  test_labels = test_originals['labels']
  feed_dict = {x: test_originals['images']}
  test_reconstructions = sess.run([x_recon_eval,indices] ,feed_dict=feed_dict)
  test_index.append(test_reconstructions[1][0])
  test_label.append(test_labels)
sio.savemat('test_index.mat',{'test_index': test_index})
files.download('test_index.mat')
sio.savemat('test_label.mat',{'test_label': test_label})
files.download('test_label.mat')