In [31]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets as K_datasets
from tensorflow.keras import models as K_models
from tensorflow.keras import layers as K_layers
from tensorflow.keras import utils as K_utils
from tensorflow.keras import optimizers as K_optimizers
from tensorflow.keras import backend as K_backend

In [36]:
class Autoencoder:
    def __init__(self, input_dim, encoder_conv_settings, decoder_conv_settings, z_dim):
        self.input_dim=input_dim
        self.encoder_conv_settings=encoder_conv_settings  # filters, kernel size, strides
        self.decoder_conv_settings=decoder_conv_settings  # filters, kernel size, strides
        self.z_dim=z_dim
        self.model=None
        return 
    
    def build(self):
        # Encoder
        encoder_input = K_layers.Input(shape=self.input_dim, name='encoder_input')
        x = encoder_input
        for i, encoder_conv_setting in enumerate(self.encoder_conv_settings):
            conv_layer = K_layers.Conv2D(
                filters=encoder_conv_setting[0],
                kernel_size=encoder_conv_setting[1],
                strides=encoder_conv_setting[2],
                padding='same',
                name='encoder_conv_' + str(i)
            )
            x = conv_layer(x)
            x = K_layers.LeakyReLU()(x)
        shape_before_flatten = K_backend.int_shape(x)[1:]
        x = K_layers.Flatten()(x)
        encoder_output = K_layers.Dense(self.z_dim, name='encoder_output')(x)
        
        self.encoder = K_models.Model(encoder_input, encoder_output)
        print(self.encoder.summary())
        
        # Decoder
        decoder_input = K_layers.Input(shape=(self.z_dim, ), name='decoder_input')
        x = K_layers.Dense(np.prod(shape_before_flatten))(decoder_input)
        x = K_layers.Reshape(shape_before_flatten)(x)
        for i, decoder_conv_setting in enumerate(self.decoder_conv_settings):
            conv_t_layer = K_layers.Conv2DTranspose(
                filters=decoder_conv_setting[0],
                kernel_size=decoder_conv_setting[1],
                strides=decoder_conv_setting[2],
                padding='same',
                name='decoder_conv_t_' + str(i)
            )
            x = conv_t_layer(x)
            if i < len(self.decoder_conv_settings) - 1:
                x = K_layers.LeakyReLU()(x)
            else:
                x = K_layers.Activation('sigmoid')(x)
        decoder_output = x
        
        self.decoder = K_models.Model(decoder_input, decoder_output)
        print(self.decoder.summary())
        
        # Full Autoencoder
        self.model = K_models.Model(encoder_input, self.decoder(encoder_output))
        return self.model
    
    def compile(self, learning_rate):
        optimizer = K_optimizers.Adam(lr=learning_rate)
        
        def r_loss(y_true, y_pred):
            return K_backend.mean(K_backend.square(y_true-y_pred), axis=[1,2,3])
        
        self.model.compile(optimizer=optimizer, loss=r_loss)
        return
    
    def train(self, X_train, batch_size=32):
        self.model.fit(
            X_train, X_train, 
            batch_size=batch_size, 
            shuffle=True,
            epochs=10
        )
        return 
    

AE = Autoencoder(
    input_dim=[28,28,1],
    encoder_conv_settings=[
        [32, 3, 1],
        [64, 3, 2],
        [64, 3, 2],
        [64, 3, 1],
    ],
    decoder_conv_settings=[
        [64, 3, 1],
        [64, 3, 2],
        [32, 3, 2],
        [1,  3, 1],
    ],
    z_dim=2
)
model = AE.build()
AE.compile(0.0005)

Model: "model_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_input (InputLayer)   [(None, 28, 28, 1)]       0         
_________________________________________________________________
encoder_conv_0 (Conv2D)      (None, 28, 28, 32)        320       
_________________________________________________________________
leaky_re_lu_58 (LeakyReLU)   (None, 28, 28, 32)        0         
_________________________________________________________________
encoder_conv_1 (Conv2D)      (None, 14, 14, 64)        18496     
_________________________________________________________________
leaky_re_lu_59 (LeakyReLU)   (None, 14, 14, 64)        0         
_________________________________________________________________
encoder_conv_2 (Conv2D)      (None, 7, 7, 64)          36928     
_________________________________________________________________
leaky_re_lu_60 (LeakyReLU)   (None, 7, 7, 64)          0  

In [32]:
def load_mnist():
    (x_train, y_train), (x_test, y_test) = K_datasets.mnist.load_data()

    x_train = x_train.astype('float32') / 255.
    x_train = x_train.reshape(x_train.shape + (1,))
    x_test = x_test.astype('float32') / 255.
    x_test = x_test.reshape(x_test.shape + (1,))

    return (x_train, y_train), (x_test, y_test)
(X_train, y_train), (X_test, y_test) = load_mnist()
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28, 1) (60000,) (10000, 28, 28, 1) (10000,)


In [37]:
AE.train(X_train)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
