In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets as K_datasets
from tensorflow.keras import models as K_models
from tensorflow.keras import layers as K_layers
from tensorflow.keras import utils as K_utils
from tensorflow.keras import optimizers as K_optimizers
from tensorflow.keras import backend as K_backend

In [5]:
class VariationalAutoencoder:
    def __init__(self, input_dim, encoder_conv_settings, decoder_conv_settings, z_dim, use_batch_norm=False, use_dropout=False):
        self.input_dim=input_dim
        self.encoder_conv_settings=encoder_conv_settings  # filters, kernel size, strides
        self.decoder_conv_settings=decoder_conv_settings  # filters, kernel size, strides
        self.z_dim=z_dim
        self.model=None
        self.use_batch_norm = use_batch_norm
        self.use_dropout = use_dropout
        return 
    
    def build(self):
        # Encoder
        encoder_input = K_layers.Input(shape=self.input_dim, name='encoder_input')
        x = encoder_input
        for i, encoder_conv_setting in enumerate(self.encoder_conv_settings):
            conv_layer = K_layers.Conv2D(
                filters=encoder_conv_setting[0],
                kernel_size=encoder_conv_setting[1],
                strides=encoder_conv_setting[2],
                padding='same',
                name='encoder_conv_' + str(i)
            )
            x = conv_layer(x)
            if self.use_batch_norm: 
                x = K_layers.BatchNormalization()(x)
            x = K_layers.LeakyReLU()(x)
            if self.use_dropout: 
                x = K_layers.Dropout(0.25)(x)
        shape_before_flatten = K_backend.int_shape(x)[1:]
        x = K_layers.Flatten()(x)
        
        self.mu = K_layers.Dense(self.z_dim, name='mu')(x)
        self.log_var = K_layers.Dense(self.z_dim, name='log_var')(x)
        encoder_mu_log_var = K_models.Model(encoder_input, (self.mu, self.log_var))
        
        def sampling(args):
            mu, log_var = args
            epsilon = K_backend.random_normal(shape=K_backend.shape(mu), mean=0, stddev=1)
            return mu + K_backend.exp(log_var / 2) * epsilon  # mu + sigma * epsilon
        
        encoder_output = K_layers.Lambda(sampling, name='encoder_output')([self.mu, self.log_var])
        
        self.encoder = K_models.Model(encoder_input, encoder_output)
        print(self.encoder.summary())
        
        # Decoder
        decoder_input = K_layers.Input(shape=(self.z_dim, ), name='decoder_input')
        x = K_layers.Dense(np.prod(shape_before_flatten))(decoder_input)
        x = K_layers.Reshape(shape_before_flatten)(x)
        for i, decoder_conv_setting in enumerate(self.decoder_conv_settings):
            conv_t_layer = K_layers.Conv2DTranspose(
                filters=decoder_conv_setting[0],
                kernel_size=decoder_conv_setting[1],
                strides=decoder_conv_setting[2],
                padding='same',
                name='decoder_conv_t_' + str(i)
            )
            x = conv_t_layer(x)
            if i < len(self.decoder_conv_settings) - 1:
                x = K_layers.LeakyReLU()(x)
            else:
                x = K_layers.Activation('sigmoid')(x)
        decoder_output = x
        
        self.decoder = K_models.Model(decoder_input, decoder_output)
        print(self.decoder.summary())
        
        # Full Autoencoder
        self.model = K_models.Model(encoder_input, self.decoder(encoder_output))
        return self.model
    
    def compile(self, learning_rate, r_loss_factor):
        self.learning_rate = learning_rate
        
        def vae_r_loss(y_true, y_pred):
            r_loss = K_backend.mean(K_backend.square(y_true-y_pred), axis=[1,2,3])
            return r_loss_factor * r_loss
        
        def vae_kl_loss(y_true, y_pred):
            kl_loss = -0.5 * K_backend.sum(
                1 + self.log_var - K_backend.square(self.mu)- K_backend.exp(self.log_var), 
                axis=1
            ) 
            return kl_loss
        
        def vae_loss(y_true, y_pred):
            r_loss = vae_r_loss(y_true, y_pred)
            kl_loss = vae_kl_loss(y_true, y_pred)
            return r_loss + kl_loss
        
        optimizer = K_optimizers.Adam(lr=learning_rate)
        self.model.compile(optimizer=optimizer, loss=vae_loss, metrics=[vae_r_loss, vae_kl_loss])
        return
    
    def train(self, X_train, batch_size=32):
        self.model.fit(
            X_train, X_train, 
            batch_size=batch_size, 
            shuffle=True,
            epochs=10
        )
        return 
    

VAE = VariationalAutoencoder(
    input_dim=[28,28,1],
    encoder_conv_settings=[
        [32, 3, 1],
        [64, 3, 2],
        [64, 3, 2],
        [64, 3, 1],
    ],
    decoder_conv_settings=[
        [64, 3, 1],
        [64, 3, 2],
        [32, 3, 2],
        [1,  3, 1],
    ],
    z_dim=2
)
model = VAE.build()
VAE.compile(0.0005, 1000)

Model: "model_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_input (InputLayer)      [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
encoder_conv_0 (Conv2D)         (None, 28, 28, 32)   320         encoder_input[0][0]              
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 28, 28, 32)   0           encoder_conv_0[0][0]             
__________________________________________________________________________________________________
encoder_conv_1 (Conv2D)         (None, 14, 14, 64)   18496       leaky_re_lu_14[0][0]             
____________________________________________________________________________________________

In [6]:
def load_mnist():
    (x_train, y_train), (x_test, y_test) = K_datasets.mnist.load_data()

    x_train = x_train.astype('float32') / 255.
    x_train = x_train.reshape(x_train.shape + (1,))
    x_test = x_test.astype('float32') / 255.
    x_test = x_test.reshape(x_test.shape + (1,))

    return (x_train, y_train), (x_test, y_test)
(X_train, y_train), (X_test, y_test) = load_mnist()
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(60000, 28, 28, 1) (60000,) (10000, 28, 28, 1) (10000,)


In [8]:
VAE.train(X_train)

Train on 60000 samples
Epoch 1/10
   32/60000 [..............................] - ETA: 11s

_SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'log_var_2/Identity:0' shape=(None, 2) dtype=float32>, <tf.Tensor 'mu_2/Identity:0' shape=(None, 2) dtype=float32>]