In [1]:
import tensorflow as tf
from keras import backend as K
from keras import layers, Model, Input
from keras.applications import VGG19
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact

%matplotlib inline
print(tf.__version__)

2.9.0


In [2]:
##
# GPU setup:
#
physical_devices  =  tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        for gpu in physical_devices:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
    except RuntimeError as e:
        print(e)

##
# Tensor setup:
#
tf.experimental.numpy.experimental_enable_numpy_behavior()

In [3]:
##
# Define perceptual loss:
#
input_shape = (32,32,3)
selected_layers = ['block1_conv2', 'block2_conv2',"block3_conv3" ,'block4_conv3','block5_conv4']
selected_layer_weights = [1.0, 2.0 , 4.0 , 8.0 , 16.0]
selected_layer_sizes = [input_shape[0], input_shape[0] // 2, input_shape[0] // 4, input_shape[0] // 8, input_shape[0] // 16]
selected_layer_filters = [64, 128, 256, 512, 512]

vgg = VGG19(weights='imagenet', include_top = False, input_shape = (32,32,3))
vgg.trainable = False
outputs = [vgg.get_layer(l).output for l in selected_layers]

feature_extractor = Model(vgg.input, outputs = outputs, name = 'VGG19_feature_extractor')
feature_extractor.summary()

@tf.function
def perceptual_loss(input_image , reconstruct_image):
    h1_list = feature_extractor(input_image)
    h2_list = feature_extractor(reconstruct_image)

    rc_loss = 0.0
    for h1, h2, weight, size, n_filters in zip(h1_list, h2_list, 
                            selected_layer_weights, selected_layer_sizes, selected_layer_filters):
        h1 = K.batch_flatten(h1)
        h2 = K.batch_flatten(h2)
        mse = weight * K.sum(K.square(h1 - h2), axis = -1) / (size * 2 * n_filters)
        rc_loss = rc_loss + mse

    return rc_loss

Model: "VGG19_feature_extractor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 32, 32, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 32, 32, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 16, 16, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 16, 16, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 16, 16, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 8, 8, 1

In [4]:
##
# Define convolutional autoencoder model:
#
inputs = Input(shape = (32,32,3), name = 'encoder_input')
x = layers.Rescaling(scale = 1/255.0)(inputs)
### Encoder ###
x = layers.Conv2D(filters = 64, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block1_conv1')(x)
x = layers.Conv2D(filters = 64, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block1_conv2')(x)
x = layers.MaxPooling2D(pool_size = (2,2), padding = 'same', name = 'block1_pool')(x)
x = layers.BatchNormalization(name = 'block1_batchnorm')(x)
x = layers.Conv2D(filters = 128, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block2_conv1')(inputs)
x = layers.Conv2D(filters = 128, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block2_conv2')(x)
x = layers.MaxPooling2D(pool_size = (2,2), padding = 'same', name = 'block2_pool')(x)
x = layers.BatchNormalization(name = 'block2_batchnorm')(x)
x = layers.Conv2D(filters = 256, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block3_conv1')(inputs)
x = layers.Conv2D(filters = 256, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block3_conv2')(x)
x = layers.MaxPooling2D(pool_size = (2,2), padding = 'same', name = 'block3_pool')(x)
x = layers.BatchNormalization(name = 'block3_batchnorm')(x)
x = layers.Conv2D(filters = 512, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block4_conv1')(inputs)
x = layers.Conv2D(filters = 512, kernel_size = 3, strides = (3,3), padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block4_conv2')(x)
x = layers.MaxPooling2D(pool_size = (2,2), padding = 'same', name = 'block4_pool')(x)
x = layers.BatchNormalization(name = 'block4_batchnorm')(x)
volumeSize = K.int_shape(x)
### Bottle-neck ###
x = layers.Flatten(name = 'latent_flatten')(x)
x = layers.Dense(1024, activation = None, name = 'latent_dense1')(x)
### Decoder ###
x = layers.Dense(volumeSize[1] * volumeSize[2] * volumeSize[3], name = 'block1_dense1')(x)
x = layers.Reshape((volumeSize[1], volumeSize[2], volumeSize[3]), name = 'block1_dense2')(x)
x = layers.Conv2DTranspose(filters = 512, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block1_convtranspose')(x)
x = layers.Conv2DTranspose(filters = 256, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block2_convtranspose')(x)
x = layers.Conv2DTranspose(filters = 128, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block3_convtranspose')(x)
x = layers.Conv2DTranspose(filters = 64, kernel_size = 3, strides = 2, padding = 'same', activation = 'relu', kernel_initializer = 'he_normal', name = 'block4_convtranspose')(x)
outputs = layers.Conv2D(filters = 3, kernel_size = 3, strides = (1,1), padding = 'same', activation = None, kernel_initializer = 'he_normal', name = 'decoder_output')(x)
model = Model(inputs = inputs, outputs = outputs, name = 'VGG11_autoencoder')
model.summary()
model.compile(optimizer = 'adamax', loss = perceptual_loss)

Model: "VGG11_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 32, 32, 3)]       0         
                                                                 
 block4_conv1 (Conv2D)       (None, 11, 11, 512)       14336     
                                                                 
 block4_conv2 (Conv2D)       (None, 4, 4, 512)         2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, 2, 2, 512)         0         
                                                                 
 block4_batchnorm (BatchNorm  (None, 2, 2, 512)        2048      
 alization)                                                      
                                                                 
 latent_flatten (Flatten)    (None, 2048)              0         
                                                       

In [5]:
##
# Load Fashion MNIST dataset:
#
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32')
x_train = tf.image.resize(x_train, [32,32])
x_train = np.stack((x_train[:,:,:,0],) * 3, axis = -1)

x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32')
x_test = tf.image.resize(x_test, [32,32])
x_test = np.stack((x_test[:,:,:,0],) * 3, axis = -1)

print(x_train.shape)
print(x_test.shape)

@interact(n = (0, len(x_train) - 1))
def display_image(n = 0):
    plt.imshow(x_train[n, :, :, 0], cmap = 'gray')
    plt.show()
    return None

(60000, 32, 32, 3)
(10000, 32, 32, 3)


interactive(children=(IntSlider(value=0, description='n', max=59999), Output()), _dom_classes=('widget-interac…

In [6]:
date = '20220525'
model_type = 'VGG11_autoencoder'
filepath = f'weights/{date}_{model_type}_{32}x{32}.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor = 'val_loss', verbose = 1, save_best_only = True, mode = 'min')
callbacks_list = [checkpoint]
history = model.fit(x = x_train, y = x_train, epochs = 100, batch_size = 32, validation_data = [x_test, x_test], callbacks = callbacks_list, verbose = 1, shuffle = True)

Epoch 1/100
Epoch 1: val_loss improved from inf to 17189982208.00000, saving model to weights\20220525_vgg11_cae_32x32.hdf5
Epoch 2/100
Epoch 2: val_loss improved from 17189982208.00000 to 9857013760.00000, saving model to weights\20220525_vgg11_cae_32x32.hdf5
Epoch 3/100
Epoch 3: val_loss improved from 9857013760.00000 to 8798000128.00000, saving model to weights\20220525_vgg11_cae_32x32.hdf5
Epoch 4/100
Epoch 4: val_loss improved from 8798000128.00000 to 7095642624.00000, saving model to weights\20220525_vgg11_cae_32x32.hdf5
Epoch 5/100
Epoch 5: val_loss improved from 7095642624.00000 to 6806304768.00000, saving model to weights\20220525_vgg11_cae_32x32.hdf5
Epoch 6/100
Epoch 6: val_loss improved from 6806304768.00000 to 6040520192.00000, saving model to weights\20220525_vgg11_cae_32x32.hdf5
Epoch 7/100
Epoch 7: val_loss did not improve from 6040520192.00000
Epoch 8/100
Epoch 8: val_loss improved from 6040520192.00000 to 5420839936.00000, saving model to weights\20220525_vgg11_cae_32