In [0]:
# Load packages
import scipy.io as sio
import numpy as np
from matplotlib import pyplot as plt
import os
from sys import exit

# **Data preparation**

In [0]:
# Make sure you upload training and test data before running this cell if you are using colab
# The dataset can be founnd here: http://ufldl.stanford.edu/housenumbers/
# We got our insights of creating dataset class here: https://github.com/wbh0912/VAE_SVHN/blob/master/dataset/dataset_svhn.py

# Images of the red channel

class SVHNDataset_R:
    def __init__(self, db_path='/content', use_extra=False):
        print("Loading files")
        self.data_dims = [32, 32]
        self.name = "svhn"
        self.train_file = os.path.join(db_path, "train_32x32.mat") # path for train file on VM
        self.test_file = os.path.join(db_path, "test_32x32.mat") # path for test file on VM
        print(self.train_file)
        
        # Load training images
        if os.path.isfile(self.train_file):
            mat = sio.loadmat(self.train_file) # entire train file
            self.train_label = mat['y']
            self.train_image = mat['X'] # 32 * 32 * 3 * 73257
            self.train_image = np.transpose(self.train_image, [3, 0, 1, 2])[:,:,:,0] # take the first channel
            self.train_image = np.clip(self.train_image / 255.0, a_min=0.0, a_max=1.0)
        else:
            print("SVHN dataset train files not found")
            exit(-1)
        self.train_batch_ptr = 0
        self.train_size = self.train_image.shape[0]

        if os.path.isfile(self.test_file):
            mat = sio.loadmat(self.test_file) # entire test file
            self.test_label = mat['y']
            self.test_image = mat['X'].astype(np.float32) 
            self.test_image = np.transpose(self.test_image, [3, 0, 1, 2])[:,:,:,0]
            self.test_image = np.clip(self.test_image / 255.0, a_min=0.0, a_max=1.0)
        else:
            print("SVHN dataset test files not found")
            exit(-1)
        self.test_batch_ptr = 0
        self.test_size = self.test_image.shape[0]
        print("SVHN_R loaded into memory")

    def next_batch(self, batch_size):
        prev_batch_ptr = self.train_batch_ptr
        self.train_batch_ptr += batch_size
        if self.train_batch_ptr > self.train_image.shape[0]:
            self.train_batch_ptr = batch_size
            prev_batch_ptr = 0
        return self.train_image[prev_batch_ptr:self.train_batch_ptr, :, :]

    def batch_by_index(self, batch_start, batch_end):
        return self.train_image[batch_start:batch_end, :, :]

    def next_test_batch(self, batch_size):
        prev_batch_ptr = self.test_batch_ptr
        self.test_batch_ptr += batch_size
        if self.test_batch_ptr > self.test_image.shape[0]:
            self.test_batch_ptr = batch_size
            prev_batch_ptr = 0
        return self.test_image[prev_batch_ptr:self.test_batch_ptr, :, :], self.test_label[prev_batch_ptr:self.test_batch_ptr]

    def reset(self):
        self.train_batch_ptr = 0
        self.test_batch_ptr = 0

In [0]:
# Images of the green channel

class SVHNDataset_G:
    def __init__(self, db_path='/content', use_extra=False):
        print("Loading files")
        self.data_dims = [32, 32]
        self.name = "svhn"
        self.train_file = os.path.join(db_path, "train_32x32.mat") # path for train file on VM
        self.test_file = os.path.join(db_path, "test_32x32.mat") # path for test file on VM
        print(self.train_file)
        
        # Load training images
        if os.path.isfile(self.train_file):
            mat = sio.loadmat(self.train_file) # entire train file
            self.train_label = mat['y']
            self.train_image = mat['X'].astype(np.float32) # 32 * 32 * 3 * 73257
            self.train_image = np.transpose(self.train_image, [3, 0, 1, 2])[:,:,:,1] # take the 2nd channel
            self.train_image = np.clip(self.train_image / 255.0, a_min=0.0, a_max=1.0)
        else:
            print("SVHN dataset train files not found")
            exit(-1)
        self.train_batch_ptr = 0
        self.train_size = self.train_image.shape[0]

        if os.path.isfile(self.test_file):
            mat = sio.loadmat(self.test_file) # entire test file
            self.test_label = mat['y']
            self.test_image = mat['X'].astype(np.float32) 
            self.test_image = np.transpose(self.test_image, [3, 0, 1, 2])[:,:,:,1]
            self.test_image = np.clip(self.test_image / 255.0, a_min=0.0, a_max=1.0)
        else:
            print("SVHN dataset test files not found")
            exit(-1)
        self.test_batch_ptr = 0
        self.test_size = self.test_image.shape[0]
        print("SVHN_G loaded into memory")

    def next_batch(self, batch_size):
        prev_batch_ptr = self.train_batch_ptr
        self.train_batch_ptr += batch_size
        if self.train_batch_ptr > self.train_image.shape[0]:
            self.train_batch_ptr = batch_size
            prev_batch_ptr = 0
        return self.train_image[prev_batch_ptr:self.train_batch_ptr, :, :]

    def batch_by_index(self, batch_start, batch_end):
        return self.train_image[batch_start:batch_end, :, :]

    def next_test_batch(self, batch_size):
        prev_batch_ptr = self.test_batch_ptr
        self.test_batch_ptr += batch_size
        if self.test_batch_ptr > self.test_image.shape[0]:
            self.test_batch_ptr = batch_size
            prev_batch_ptr = 0
        return self.test_image[prev_batch_ptr:self.test_batch_ptr, :, :], self.test_label[prev_batch_ptr:self.test_batch_ptr]

    def reset(self):
        self.train_batch_ptr = 0
        self.test_batch_ptr = 0

In [0]:
# Images of the blue channel

class SVHNDataset_B:
    def __init__(self, db_path='/content', use_extra=False):
        print("Loading files")
        self.data_dims = [32, 32]
        self.name = "svhn"
        self.train_file = os.path.join(db_path, "train_32x32.mat") # path for train file on VM
        self.test_file = os.path.join(db_path, "test_32x32.mat") # path for test file on VM
        print(self.train_file)
        
        # Load training images
        if os.path.isfile(self.train_file):
            mat = sio.loadmat(self.train_file) # entire train file
            self.train_label = mat['y']
            self.train_image = mat['X'].astype(np.float32) # 32 * 32 * 3 * 73257
            self.train_image = np.transpose(self.train_image, [3, 0, 1, 2])[:,:,:,2] # take the 3rd channel
            self.train_image = np.clip(self.train_image / 255.0, a_min=0.0, a_max=1.0)
        else:
            print("SVHN dataset train files not found")
            exit(-1)
        self.train_batch_ptr = 0
        self.train_size = self.train_image.shape[0]

        if os.path.isfile(self.test_file):
            mat = sio.loadmat(self.test_file) # entire test file
            self.test_label = mat['y']
            self.test_image = mat['X'].astype(np.float32) 
            self.test_image = np.transpose(self.test_image, [3, 0, 1, 2])[:,:,:,2]
            self.test_image = np.clip(self.test_image / 255.0, a_min=0.0, a_max=1.0)
        else:
            print("SVHN dataset test files not found")
            exit(-1)
        self.test_batch_ptr = 0
        self.test_size = self.test_image.shape[0]
        print("SVHN_B loaded into memory")

    def next_batch(self, batch_size):
        prev_batch_ptr = self.train_batch_ptr
        self.train_batch_ptr += batch_size
        if self.train_batch_ptr > self.train_image.shape[0]:
            self.train_batch_ptr = batch_size
            prev_batch_ptr = 0
        return self.train_image[prev_batch_ptr:self.train_batch_ptr, :, :]

    def batch_by_index(self, batch_start, batch_end):
        return self.train_image[batch_start:batch_end, :, :]

    def next_test_batch(self, batch_size):
        prev_batch_ptr = self.test_batch_ptr
        self.test_batch_ptr += batch_size
        if self.test_batch_ptr > self.test_image.shape[0]:
            self.test_batch_ptr = batch_size
            prev_batch_ptr = 0
        return self.test_image[prev_batch_ptr:self.test_batch_ptr, :, :], self.test_label[prev_batch_ptr:self.test_batch_ptr]

    def reset(self):
        self.train_batch_ptr = 0
        self.test_batch_ptr = 0

In [6]:
# # Load SVHN Data
print(os.getcwd())
dataset_R= SVHNDataset_R('.')
dataset_G= SVHNDataset_G('.')
dataset_B= SVHNDataset_B('.')
n_samples=dataset_R.train_size

/content
Loading files
./train_32x32.mat
SVHN_R loaded into memory
Loading files
./train_32x32.mat
SVHN_G loaded into memory
Loading files
./train_32x32.mat
SVHN_B loaded into memory


# **Build VAE model and train function**

In [0]:
# import packages
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import matplotlib

# Some Settings
np.random.seed(0)
tf.set_random_seed(0)

In [0]:
class VariationalAutoencoder(object):
    """ Variation Autoencoder (VAE) with an sklearn-like interface implemented using TensorFlow.
    https://jmetzen.github.io/2015-11-27/vae.html
    See "Auto-Encoding Variational Bayes" by Kingma and Welling for more details.
    """
    def __init__(self, network_architecture, transfer_fct=tf.nn.relu, 
                 learning_rate=0.001, batch_size=100):
        self.network_architecture = network_architecture
        self.transfer_fct = transfer_fct
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        # tf Graph input
        self.x = tf.placeholder(tf.float32, [None, network_architecture["n_input"]])
        
        # Create autoencoder network
        self._create_network()
        # Define loss function based variational upper-bound and 
        # corresponding optimizer
        self._create_loss_optimizer()
        
        # Initializing the tensor flow variables
        init = tf.global_variables_initializer()

        # Launch the session
        self.sess = tf.InteractiveSession()
        self.sess.run(init)
    
    def _create_network(self):
        # Initialize autoencode network weights and biases
        network_weights = self._initialize_weights(**self.network_architecture)

        # Use recognition network to determine mean and 
        # (log) variance of Gaussian distribution in latent
        # space
        self.z_mean, self.z_log_sigma_sq = self._recognition_network(network_weights["weights_recog"], 
                                      network_weights["biases_recog"])

        # Draw one sample z from Gaussian distribution
        n_z = self.network_architecture["n_z"]
        eps = tf.random_normal((self.batch_size, n_z), 0, 1, 
                               dtype=tf.float32)
        # z = mu + sigma*epsilon
        self.z = tf.add(self.z_mean, 
                        tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))

        # Use generator to determine mean of
        # Bernoulli distribution of reconstructed input
        self.x_reconstr_mean = self._generator_network(network_weights["weights_gener"],
                                    network_weights["biases_gener"])
            
    def _initialize_weights(self, n_hidden_recog_1, n_hidden_recog_2, 
                            n_hidden_gener_1,  n_hidden_gener_2, 
                            n_input, n_z):
        all_weights = dict()
        initializer = tf.contrib.layers.xavier_initializer()
        all_weights['weights_recog'] = {
            'h1': tf.Variable(initializer(shape = [n_input, n_hidden_recog_1])),
            'h2': tf.Variable(initializer(shape = [n_hidden_recog_1, n_hidden_recog_2])),
            'out_mean': tf.Variable(initializer(shape = [n_hidden_recog_2, n_z])),
            'out_log_sigma': tf.Variable(initializer(shape = [n_hidden_recog_2, n_z]))}
        all_weights['biases_recog'] = {
            'b1': tf.Variable(tf.zeros([n_hidden_recog_1], dtype=tf.float32)),
            'b2': tf.Variable(tf.zeros([n_hidden_recog_2], dtype=tf.float32)),
            'out_mean': tf.Variable(tf.zeros([n_z], dtype=tf.float32)),
            'out_log_sigma': tf.Variable(tf.zeros([n_z], dtype=tf.float32))}
        all_weights['weights_gener'] = {
            'h1': tf.Variable(initializer(shape = [n_z, n_hidden_gener_1])),
            'h2': tf.Variable(initializer(shape = [n_hidden_gener_1, n_hidden_gener_2])),
            'out_mean': tf.Variable(initializer(shape = [n_hidden_gener_2, n_input])),
            'out_log_sigma': tf.Variable(initializer(shape = [n_hidden_gener_2, n_input]))}
        all_weights['biases_gener'] = {
            'b1': tf.Variable(tf.zeros([n_hidden_gener_1], dtype=tf.float32)),
            'b2': tf.Variable(tf.zeros([n_hidden_gener_2], dtype=tf.float32)),
            'out_mean': tf.Variable(tf.zeros([n_input], dtype=tf.float32)),
            'out_log_sigma': tf.Variable(tf.zeros([n_input], dtype=tf.float32))}
        return all_weights
            
    def _recognition_network(self, weights, biases):
        # Generate probabilistic encoder (recognition network), which
        # maps inputs onto a normal distribution in latent space.
        # The transformation is parametrized and can be learned.
        layer_1 = self.transfer_fct(tf.add(tf.matmul(self.x, weights['h1']), 
                                           biases['b1'])) 
        layer_2 = self.transfer_fct(tf.add(tf.matmul(layer_1, weights['h2']), 
                                           biases['b2']))
        z_mean = tf.add(tf.matmul(layer_2, weights['out_mean']),
                        biases['out_mean'])
        z_log_sigma_sq = tf.add(tf.matmul(layer_2, weights['out_log_sigma']), 
                   biases['out_log_sigma'])
        return (z_mean, z_log_sigma_sq)

    def _generator_network(self, weights, biases):
        # Generate probabilistic decoder (decoder network), which
        # maps points in latent space onto a Bernoulli distribution in data space.
        # The transformation is parametrized and can be learned.
        layer_1 = self.transfer_fct(tf.add(tf.matmul(self.z, weights['h1']), 
                                           biases['b1'])) 
        layer_2 = self.transfer_fct(tf.add(tf.matmul(layer_1, weights['h2']), 
                                           biases['b2'])) 
        x_reconstr_mean = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, weights['out_mean']), 
                                 biases['out_mean']))        
        return x_reconstr_mean
            
    def _create_loss_optimizer(self):
        # The loss is composed of two terms:
        # 1.) The reconstruction loss (the negative log probability
        #     of the input under the reconstructed Bernoulli distribution 
        #     induced by the decoder in the data space).
        #     This can be interpreted as the number of "nats" required
        #     for reconstructing the input when the activation in latent
        #     is given.
        # Adding 1e-10 to avoid evaluation of log(0.0)
        reconstr_loss = -tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean)
                           + (1-self.x) * tf.log(1e-10 + 1 - self.x_reconstr_mean),
                           1)
        # 2.) The latent loss, which is defined as the Kullback Leibler divergence 
        ##    between the distribution in latent space induced by the encoder on 
        #     the data and some prior. This acts as a kind of regularizer.
        #     This can be interpreted as the number of "nats" required
        #     for transmitting the the latent space distribution given
        #     the prior.
        latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq 
                                           - tf.square(self.z_mean) 
                                           - tf.exp(self.z_log_sigma_sq), 1)
        self.cost = tf.reduce_mean(reconstr_loss + latent_loss)   # average over batch
        
        # Use ADAM optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
        
    def partial_fit(self, X):
        """Train model based on mini-batch of input data.
        
        Return cost of mini-batch.
        """
        opt, cost = self.sess.run((self.optimizer, self.cost), 
                                  feed_dict={self.x: X})
        return cost
    
    def transform(self, X):
        """Transform data by mapping it into the latent space."""
        # Note: This maps to mean of distribution, we could alternatively
        # sample from Gaussian distribution
        return self.sess.run(self.z_mean, feed_dict={self.x: X})
    
    def generate(self, z_mu=None):
        """ Generate data by sampling from latent space.
        
        If z_mu is not None, data for this point in latent space is
        generated. Otherwise, z_mu is drawn from prior in latent 
        space.        
        """
        if z_mu is None:
            z_mu = np.random.normal(size=self.network_architecture["n_z"])
        # Note: This maps to mean of distribution, we could alternatively
        # sample from Gaussian distribution
        return self.sess.run(self.x_reconstr_mean, 
                             feed_dict={self.z: z_mu})
    
    def reconstruct(self, X):
        """ Use VAE to reconstruct given data. """
        return self.sess.run(self.x_reconstr_mean, 
                             feed_dict={self.x: X})


In [0]:
# Train Function
def train_R(network_architecture, learning_rate=0.001,
          batch_size=100, training_epochs=10, display_step=1):
    vae = VariationalAutoencoder(network_architecture, 
                                 learning_rate=learning_rate, 
                                 batch_size=batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs= dataset_R.next_batch(batch_size).reshape(batch_size,1024)
            # Fit training using batch data
            cost = vae.partial_fit(batch_xs)
            # Compute average loss
            avg_cost += cost / n_samples * batch_size

        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), 
                  "cost=", "{:.9f}".format(avg_cost))
    return vae

def train_G(network_architecture, learning_rate=0.001,
          batch_size=100, training_epochs=10, display_step=1):
    vae = VariationalAutoencoder(network_architecture, 
                                 learning_rate=learning_rate, 
                                 batch_size=batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs= dataset_G.next_batch(batch_size).reshape(batch_size,1024)
            # Fit training using batch data
            cost = vae.partial_fit(batch_xs)
            # Compute average loss
            avg_cost += cost / n_samples * batch_size

        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), 
                  "cost=", "{:.9f}".format(avg_cost))
    return vae

def train_B(network_architecture, learning_rate=0.001,
          batch_size=100, training_epochs=10, display_step=1):
    vae = VariationalAutoencoder(network_architecture, 
                                 learning_rate=learning_rate, 
                                 batch_size=batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs= dataset_B.next_batch(batch_size).reshape(batch_size,1024)
            # Fit training using batch data
            cost = vae.partial_fit(batch_xs)
            # Compute average loss
            avg_cost += cost / n_samples * batch_size

        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), 
                  "cost=", "{:.9f}".format(avg_cost))
    return vae

# **Comparison between different latent space dimension**

In [0]:
# default architecture
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=500, # 2nd layer encoder neurons
                            n_hidden_gener_1=500, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=20)  # dimensionality of latent space

vae_20_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_20_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_20_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test samples
x_sample_R = dataset_R.next_test_batch(100)[0]
x_sample_G = dataset_G.next_test_batch(100)[0]
x_sample_B = dataset_B.next_test_batch(100)[0]

# Combine RGB channels
x_sample = np.transpose(np.stack((x_sample_R, x_sample_G, x_sample_B)), (1, 2, 3, 0))

In [0]:
# test outputs
x_reconstruct_R = vae_20_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G = vae_20_R.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B = vae_20_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# Combine sample arrays
x_reconstruct = np.transpose(np.stack((x_reconstruct_R, x_reconstruct_G, x_reconstruct_B)), (1, 2, 3, 0))

In [0]:
# 128D latent space
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=500, # 2nd layer encoder neurons
                            n_hidden_gener_1=500, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=128)  # dimensionality of latent space

vae_128_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_128_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_128_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs of 128D
x_reconstruct_R_128 = vae_128_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_128 = vae_128_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_128 = vae_128_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_128 = np.transpose(np.stack((x_reconstruct_R_128, x_reconstruct_G_128, x_reconstruct_B_128)), (1, 2, 3, 0))

In [0]:
# 512D latent space
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=500, # 2nd layer encoder neurons
                            n_hidden_gener_1=500, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=512)  # dimensionality of latent space

vae_512_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_512_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_512_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs of 512D
x_reconstruct_R_512 = vae_512_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_512 = vae_512_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_512 = vae_512_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_512 = np.transpose(np.stack((x_reconstruct_R_512, x_reconstruct_G_512, x_reconstruct_B_512)), (1, 2, 3, 0))

In [0]:
# 10D latent space
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=500, # 2nd layer encoder neurons
                            n_hidden_gener_1=500, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=10)  # dimensionality of latent space

vae_10_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_10_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_10_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs of 10D
x_reconstruct_R_10 = vae_10_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_10 = vae_10_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_10 = vae_10_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_10 = np.transpose(np.stack((x_reconstruct_R_10, x_reconstruct_G_10, x_reconstruct_B_10)), (1, 2, 3, 0))

In [0]:
# Final change of latent dimension comparison
plt.figure(figsize=(8, 12))
for i in range(5):
    plt.subplot(5, 5, 5*i + 1)
    plt.imshow(x_sample[i])
    plt.title("Test input")
    plt.subplot(5, 5, 5*i + 2)
    plt.imshow(x_reconstruct_10[i])
    plt.title("Z = 10")
    plt.subplot(5, 5, 5*i + 3)
    plt.imshow(x_reconstruct[i])
    plt.title("Z = 20")
    plt.subplot(5, 5, 5*i + 4)
    plt.imshow(x_reconstruct_128[i])
    plt.title("Z = 128")
    plt.subplot(5, 5, 5*i + 5)
    plt.imshow(x_reconstruct_512[i])
    plt.title("Z = 512")
plt.tight_layout()
# plt.savefig('change of latent dimension') # this line is used to save image

# **Comparison between different layer width**

In [0]:
# amount of neurons = 200
network_architecture = dict(n_hidden_recog_1=200, # 1st layer encoder neurons
                            n_hidden_recog_2=200, # 2nd layer encoder neurons
                            n_hidden_gener_1=200, # 1st layer decoder neurons
                            n_hidden_gener_2=200, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=20)  # dimensionality of latent space

vae_arch200_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_arch200_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_arch200_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs of neurons = 200
x_reconstruct_R_arch200 = vae_arch200_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_arch200 = vae_arch200_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_arch200 = vae_arch200_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_arch200 = np.transpose(np.stack((x_reconstruct_R_arch200, x_reconstruct_G_arch200, x_reconstruct_B_arch200)), (1, 2, 3, 0))

In [0]:
# amount of neurons = 1000
network_architecture = dict(n_hidden_recog_1=1000, # 1st layer encoder neurons
                            n_hidden_recog_2=1000, # 2nd layer encoder neurons
                            n_hidden_gener_1=1000, # 1st layer decoder neurons
                            n_hidden_gener_2=1000, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=20)  # dimensionality of latent space

vae_arch1000_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_arch1000_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_arch1000_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs of neurons = 1000
x_reconstruct_R_arch1000 = vae_arch1000_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_arch1000 = vae_arch1000_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_arch1000 = vae_arch1000_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_arch1000 = np.transpose(np.stack((x_reconstruct_R_arch1000, x_reconstruct_G_arch1000, x_reconstruct_B_arch1000)), (1, 2, 3, 0))

In [0]:
plt.figure(figsize=(8, 12))
for i in range(5):
    plt.subplot(5, 4, 4*i + 1)
    plt.imshow(x_sample[i+40])
    plt.title("Test input")
    plt.subplot(5, 4, 4*i + 2)
    plt.imshow(x_reconstruct_arch200[i+40])
    plt.title("100 Neurons")
    plt.subplot(5, 4, 4*i + 3)
    plt.imshow(x_reconstruct[i+40])
    plt.title("500 Neurons")
    plt.subplot(5, 4, 4*i + 4)
    plt.imshow(x_reconstruct_arch1000[i+40])
    plt.title("1000 Neurons")
plt.tight_layout()
# plt.savefig('change of neuron')

# **Comparison between different network architecture**

In [0]:
# different neuron per layer
# 500-1000 encoder
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=1000, # 2nd layer encoder neurons
                            n_hidden_gener_1=1000, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=20)  # dimensionality of latent space

vae_arch510_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_arch510_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_arch510_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs of 500-1000 encoder
x_reconstruct_R_arch510 = vae_arch510_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_arch510 = vae_arch510_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_arch510 = vae_arch510_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_arch510 = np.transpose(np.stack((x_reconstruct_R_arch510, x_reconstruct_G_arch510, x_reconstruct_B_arch510)), (1, 2, 3, 0))

In [0]:
# different neuron per layer
# 500-200 encoder
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=200, # 2nd layer encoder neurons
                            n_hidden_gener_1=200, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=20)  # dimensionality of latent space

vae_arch52_R = train_R(network_architecture, training_epochs=10, batch_size=100)
vae_arch52_G = train_G(network_architecture, training_epochs=10, batch_size=100)
vae_arch52_B = train_B(network_architecture, training_epochs=10, batch_size=100)

In [0]:
# test outputs 500-200 encoder
x_reconstruct_R_arch52 = vae_arch52_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_arch52 = vae_arch52_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_arch52 = vae_arch52_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_arch52 = np.transpose(np.stack((x_reconstruct_R_arch52, x_reconstruct_G_arch52, x_reconstruct_B_arch52)), (1, 2, 3, 0))

In [0]:
# unequal layer width comparison
plt.figure(figsize=(8, 12))
for i in range(5):
    plt.subplot(5, 4, 4*i + 1)
    plt.imshow(x_sample[i+20])
    plt.title("Test input")
    plt.subplot(5, 4, 4*i + 2)
    plt.imshow(x_reconstruct_arch510[i+20])
    plt.title("500-1000")
    plt.subplot(5, 4, 4*i + 3)
    plt.imshow(x_reconstruct[i+20])
    plt.title("500-500")
    plt.subplot(5, 4, 4*i + 4)
    plt.imshow(x_reconstruct_arch52[i+20])
    plt.title("500-200")
plt.tight_layout()
# plt.savefig('change of arch')

# **Comparison between different numbers of epochs**

In [0]:
# epoch = 20
network_architecture = dict(n_hidden_recog_1=500, # 1st layer encoder neurons
                            n_hidden_recog_2=500, # 2nd layer encoder neurons
                            n_hidden_gener_1=500, # 1st layer decoder neurons
                            n_hidden_gener_2=500, # 2nd layer decoder neurons
                            n_input=1024, # 
                            n_z=20)  # dimensionality of latent space

vae_ep20_R = train_R(network_architecture, training_epochs=20, batch_size=100)
vae_ep20_G = train_G(network_architecture, training_epochs=20, batch_size=100)
vae_ep20_B = train_B(network_architecture, training_epochs=20, batch_size=100)

In [0]:
# test outputs of epoch = 20
x_reconstruct_R_ep20 = vae_ep20_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_ep20 = vae_ep20_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_ep20 = vae_ep20_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_ep20 = np.transpose(np.stack((x_reconstruct_R_ep20, x_reconstruct_G_ep20, x_reconstruct_B_ep20)), (1, 2, 3, 0))

In [0]:
# epoch = 50
vae_ep50_R = train_R(network_architecture, training_epochs=50, batch_size=100)
vae_ep50_G = train_G(network_architecture, training_epochs=50, batch_size=100)
vae_ep50_B = train_B(network_architecture, training_epochs=50, batch_size=100)

In [0]:
# test outputs epoch = 50
x_reconstruct_R_ep50 = vae_ep50_R.reconstruct(x_sample_R.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_G_ep50 = vae_ep50_G.reconstruct(x_sample_G.reshape(100,1024)).reshape(100, 32, 32)
x_reconstruct_B_ep50 = vae_ep50_B.reconstruct(x_sample_B.reshape(100,1024)).reshape(100, 32, 32)

# combine sample array
x_reconstruct_ep50 = np.transpose(np.stack((x_reconstruct_R_ep50, x_reconstruct_G_ep50, x_reconstruct_B_ep50)), (1, 2, 3, 0))

In [0]:
# comparison of different epochs
plt.figure(figsize=(8, 12))
for i in range(5):
    plt.subplot(5, 4, 4*i + 1)
    plt.imshow(x_sample[i+20])
    plt.title("Test input")
    plt.subplot(5, 4, 4*i + 2)
    plt.imshow(x_reconstruct[i+20])
    plt.title("epoch = 10")
    plt.subplot(5, 4, 4*i + 3)
    plt.imshow(x_reconstruct_ep20[i+20])
    plt.title("epoch = 20")
    plt.subplot(5, 4, 4*i + 4)
    plt.imshow(x_reconstruct_ep50[i+20])
    plt.title("epoch = 50")
plt.tight_layout()
# plt.savefig('change of epoch')