In [1]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense, Input, Flatten, Reshape 
from tensorflow.keras.models import Model, Sequential
from matplotlib import pyplot as plt
from IPython import display 
import numpy as np

## MNIST Autoencoder

In this exercise we will be using an autoencoder to first compress hand-written digit images from the MNIST dataset down to lower dimensional representations and then expand them back the original images.<br>
![AE](fig/AE.jpg)
<br>
To keep things simple we will use dense layers, so no convolutions here.
<hr>

First we load in the MNIST dataset.

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# normalize image data
x_train = x_train/255.0
x_test = x_test/255.0 

# print image dimensions
print(f'image shape: {x_train[0].shape}')

# Plot example image from x_train
plt.imshow(x_train[0], cmap = "gray")
plt.show()

image shape: (28, 28)


Now we create the first part of the autoencoder: the encoder model<br>
The enocder model compresses the input image down to a lower dimensional latent space.

In [3]:
# pick a size for the latent dimension
# how low can you go and still get good results?
# keep in mind the orignal image is 28x28 = 784 pixels
# 32 might be a good first value to try
# your code here
LATENT_SIZE = 32

# Note how sequential models can also be passed a list of layers
# This can be more concise than using add()
encoder = Sequential([
    Flatten(input_shape = (28, 28)), # we need to flatten the 2D image before the Dense layer
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    # specify the size of the latent dimension
    # your code here
    Dense(32, activation='relu'),
])

encoder.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
Total params: 110,816
Trainable params: 110,816
Non-trainable params: 0
_________________________________________________________________


Next we create the 2nd half of the autoencoder: the decoder<br>
The decoder expands an image representation in the latent space back to the full dimensions of the original input image.

In [4]:
decoder = Sequential([
    Dense(64, input_shape = (LATENT_SIZE,), activation='relu'),
    Dense(128, activation='relu'),
    # specify a reasonable output activation
    # your code here
    Dense(784, activation='relu'),
    Reshape((28, 28)) # note the reshape to make the output 2D
])

decoder.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_4 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_5 (Dense)              (None, 784)               101136    
_________________________________________________________________
reshape (Reshape)            (None, 28, 28)            0         
Total params: 111,568
Trainable params: 111,568
Non-trainable params: 0
_________________________________________________________________


Finally, we combine the encoder and decoder into the autoencoder.<br>
The autoencoder shrinks the image down to the latent space representation and then expands it again to the original dimensions.

In [5]:
img = Input(shape = (28, 28)) # input
latent_vector = encoder(img) # latent space
output = decoder(latent_vector) # output

# here we use the alternative Model constructor where we specify the model's input and output
# your code here
autoencoder = Model(inputs = img, outputs = output)

# your code here
# choose a sensible loss function for 'reconstruction error'
autoencoder.compile("nadam", loss = 'mse')

autoencoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 28, 28)]          0         
_________________________________________________________________
sequential (Sequential)      (None, 32)                110816    
_________________________________________________________________
sequential_1 (Sequential)    (None, 28, 28)            111568    
Total params: 222,384
Trainable params: 222,384
Non-trainable params: 0
_________________________________________________________________


Even a simple neural network like this can take a minute or two to train.<br>
For this reason you've been provided with some code that visualizes the model predictions on the test set after each epoch.<br>
You need to specify
1. the number of epochs you'd like to train for and
2. the predictor and target data used for train and validation

**Hint:** *Autoencoders are 'self-supervised' and we are trying to minimize the reconstruction loss.*

In [None]:
# choose a number of epochs to train for
# your code here
EPOCHS = 100

# Note: epoch 0 is before any fitting
for epoch in range(EPOCHS+1):
    fig, axs = plt.subplots(4, 4)
    rand = x_test[np.random.randint(0, 10000, 16)].reshape((4, 4, 1, 28, 28))
    
    for i in range(4):
        for j in range(0,4,2):
            axs[i, j].imshow(rand[i, j][0], cmap = "gray")
            axs[i, j].axis("off")
            axs[i, j+1].imshow(autoencoder.predict(rand[i, j])[0], cmap = "gray")
            axs[i, j+1].axis("off")
            if i == 0:
                axs[i, j].set_title('test')
                axs[i, j+1].set_title('pred')
    
    plt.subplots_adjust(wspace = 0, hspace = 0)
    plt.show()
    print("-----------", "EPOCH", epoch, "-----------")
    # specify predictors and targets for train and validation
    # your code here
    autoencoder.fit(x=x_train,
                    y=y_train,
                    validation_data=(x_test, y_test))

How did you do? You can experiment with:
* different latent size
* more/larger layers
* regularization