## Imports

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

import matplotlib.pyplot as plt
import numpy as np

import os
import zipfile
import urllib.request
import random
from IPython import display

## Parameters

In [None]:
# set a random seed
np.random.seed(51)

# parameters for building the model and training
BATCH_SIZE=2000
LATENT_DIM=512
IMAGE_SIZE=64

## Download the Dataset


In [None]:
# make the data directory
try:
  os.mkdir('/tmp/anime')
except OSError:
  pass

# download the zipped dataset to the data directory
data_url = "https://storage.googleapis.com/learning-datasets/Resources/anime-faces.zip"
data_file_name = "animefaces.zip"
download_dir = '/tmp/anime/'
urllib.request.urlretrieve(data_url, data_file_name)

# extract the zip file
zip_ref = zipfile.ZipFile(data_file_name, 'r')
zip_ref.extractall(download_dir)
zip_ref.close()

## Prepare the Dataset

In [None]:
# Data Preparation Utilities

def get_dataset_slice_paths(image_dir):
  '''returns a list of paths to the image files'''
  image_file_list = os.listdir(image_dir)
  image_paths = [os.path.join(image_dir, fname) for fname in image_file_list]

  return image_paths


def map_image(image_filename):
  '''preprocesses the images'''
  img_raw = tf.io.read_file(image_filename)
  image = tf.image.decode_jpeg(img_raw)

  image = tf.cast(image, dtype=tf.float32)
  # resize: Changes the spatial dimensions (width and height) of an image.
  # Exp : have a 100x100 image and you resize it to 200x200, the image will be
  # scaled up with interpolated values to fill the new dimensions.
  image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
  image = image / 255.0
  # Changes the shape of the tensor without altering the data.
  # Exp : If you have a tensor of shape (100, 100, 3) and you reshape it to
   # (50, 200, 3), the total number of elements (1001003) remains the same,
   # but their arrangement is different.
  image = tf.reshape(image, shape=(IMAGE_SIZE, IMAGE_SIZE, 3,))

  return image

In [None]:
# get the list containing the image paths
paths = get_dataset_slice_paths("/tmp/anime/images/")

# shuffle the paths
random.shuffle(paths)

# split the paths list into to training (80%) and validation sets(20%).
paths_len = len(paths)
train_paths_len = int(paths_len * 0.8)

train_paths = paths[:train_paths_len]
val_paths = paths[train_paths_len:]

# load the training image paths into tensors, create batches and shuffle
training_dataset = tf.data.Dataset.from_tensor_slices((train_paths))
training_dataset = training_dataset.map(map_image)
training_dataset = training_dataset.shuffle(1000).batch(BATCH_SIZE)

# load the validation image paths into tensors and create batches
validation_dataset = tf.data.Dataset.from_tensor_slices((val_paths))
validation_dataset = validation_dataset.map(map_image)
validation_dataset = validation_dataset.batch(BATCH_SIZE)


print(f'number of batches in the training set: {len(training_dataset)}')
print(f'number of batches in the validation set: {len(validation_dataset)}')

## Display Utilities


In [None]:
def display_faces(dataset, size=9):
  '''Takes a sample from a dataset batch and plots it in a grid.'''
  dataset = dataset.unbatch().take(size)
  n_cols = 3
  n_rows = size//n_cols + 1
  plt.figure(figsize=(5, 5))
  i = 0
  for image in dataset:
    i += 1
    disp_img = np.reshape(image, (64,64,3))
    plt.subplot(n_rows, n_cols, i)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(disp_img)


def display_one_row(disp_images, offset, shape=(28, 28)):
  '''Displays a row of images.'''
  for idx, image in enumerate(disp_images):
    plt.subplot(3, 10, offset + idx + 1)
    plt.xticks([])
    plt.yticks([])
    image = np.reshape(image, shape)
    plt.imshow(image)


def display_results(disp_input_images, disp_predicted):
  '''Displays input and predicted images.'''
  plt.figure(figsize=(15, 5))
  display_one_row(disp_input_images, 0, shape=(IMAGE_SIZE,IMAGE_SIZE,3))
  display_one_row(disp_predicted, 20, shape=(IMAGE_SIZE,IMAGE_SIZE,3))


In [None]:
display_faces(validation_dataset, size=12)

## Build the Model

<img src="https://drive.google.com/uc?export=view&id=1YAZAeMGEJ1KgieYk1ju-S9DoshpMREeC" width="60%" height="60%"/>

### Sampling Class



In [None]:
class Sampling(tf.keras.layers.Layer):
  def call(self, inputs):
    mu, sigma = inputs

    # Get the size and dimensions of the batch
    batch = tf.shape(mu)[0]
    dim = tf.shape(mu)[1]

    # Generate a random tensor
    epsilon = tf.keras.backend.random_normal(shape = (batch,dim))

    # Combine the inputs and noise
    z = mu + tf.exp(0.5 * sigma) * epsilon

    return  z

### Encoder Layers


In [None]:
def encoder_layers(inputs, latent_dim):

  x = tf.keras.layers.Conv2D(32,3,strides = 2,padding = 'same',activation = 'relu')(inputs)
  x = tf.keras.layers.BatchNormalization()(x)

  x = tf.keras.layers.Conv2D(64,3,strides = 2,padding = 'same',activation = 'relu')(x)
  x = tf.keras.layers.BatchNormalization()(x)

  x = tf.keras.layers.Conv2D(128,3,strides = 2,padding = 'same',activation = 'relu')(x)
  batch_3 = tf.keras.layers.BatchNormalization()(x)

  # Flatten the features and feed into the Dense network
  x = tf.keras.layers.Flatten()(batch_3)

  x = tf.keras.layers.Dense(1024,activation = 'relu')(x)
  x = tf.keras.layers.BatchNormalization()(x)

  # Add output dense networks for mu and sigma, units equal to the declared latend_dim
  mu = tf.keras.layers.Dense(latent_dim)(x)
  sigma = tf.keras.layers.Dense(latent_dim)(x)

  return mu, sigma, batch_3.shape

### Encoder Model


In [None]:
def encoder_model(latent_dim, input_shape):

  inputs = tf.keras.layers.Input(shape = input_shape)
  mu, sigma, conv_shape = encoder_layers(inputs,latent_dim = latent_dim)
  z = Sampling()((mu,sigma))
  model = tf.keras.Model(inputs = inputs,outputs = [mu,sigma,z])

  model.summary()
  return model, conv_shape

### Decoder Layers


In [None]:
def decoder_layers(inputs, conv_shape):

  # Feed to a Dense network with units computed from the conv_shpe dimensions
  units = conv_shape[1] * conv_shape[2] * conv_shape[3]
  x = tf.keras.layers.Dense(units,activation = 'relu')(inputs)
  x = tf.keras.layers.BatchNormalization()(x)

  # Reshape output using the conv_shape _dimensions
  x = tf.keras.layers.Reshape((conv_shape[1],conv_shape[2],conv_shape[3]))(x)

  # Upsample the features back to the original dimensions
  x = tf.keras.layers.Conv2DTranspose(128,3,strides = 2,padding = 'same',activation = 'relu')(x)
  x = tf.keras.layers.BatchNormalization()(x)

  x = tf.keras.layers.Conv2DTranspose(64,3,strides = 2,padding = 'same',activation = 'relu')(x)
  x = tf.keras.layers.BatchNormalization()(x)

  x = tf.keras.layers.Conv2DTranspose(32,3,strides = 2,padding = 'same',activation = 'relu')(x)
  x = tf.keras.layers.BatchNormalization()(x)

  # Strides = 3, the channel of image == 3
  x = tf.keras.layers.Conv2DTranspose(3,3,strides = 1,padding = 'same',activation = 'sigmoid')(x)

  return x

### Decoder Model


In [None]:
def decoder_model(latent_dim, conv_shape):
  # Set the inputs to the shape of the latent space
  inputs = tf.keras.layers.Input(shape = (latent_dim,))

  # Get the output of the decoder layers
  outputs = decoder_layers(inputs,conv_shape)

  # Declare the inputs and outputs of the model
  model = tf.keras.Model(inputs,outputs)

  model.summary()
  return model

### Kullback–Leibler Divergence



In [None]:
def kl_reconstruction_loss(mu, sigma):

  kl_loss = 1 + sigma - tf.square(mu) - tf.math.exp(sigma)
  return tf.reduce_mean(kl_loss) * -0.5

In [None]:
def vae_model(encoder, decoder, input_shape):
  # Sets the inputs
  inputs = tf.keras.layers.Input(shape = input_shape)

  # Get mu,sigma and z from the encoder output
  mu,sigma,z = encoder(inputs)

  # Get the reconstructed output from the decoder
  reconstructed = decoder(z)

  # Define the inputs and outputs of the VAE
  model = tf.keras.Model(inputs,reconstructed)

  # Add the KL loss
  loss = kl_reconstruction_loss(mu,sigma)
  model.add_loss(loss)

  return model

In [None]:
def get_models(input_shape, latent_dim):
  """Returns the encoder, decoder, and vae models"""
  encoder,conv_shape = encoder_model(latent_dim,input_shape)
  decoder = decoder_model(latent_dim,conv_shape)
  vae = vae_model(encoder,decoder,input_shape = input_shape)

  return encoder, decoder, vae

In [None]:
encoder, decoder, vae = get_models(input_shape=(64,64,3,), latent_dim=LATENT_DIM)

## Train the Model



In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_metric = tf.keras.metrics.Mean()
mse_loss = tf.keras.losses.MeanSquaredError()
bce_loss = tf.keras.losses.BinaryCrossentropy()

In [None]:
def generate_and_save_images(model, epoch, step, test_input):

  predictions = model.predict(test_input)

  fig = plt.figure(figsize=(4,4))

  for i in range(predictions.shape[0]):
      plt.subplot(4, 4, i+1)
      img = predictions[i, :, :, :] * 255
      img = img.astype('int32')
      plt.imshow(img)
      plt.axis('off')

  # tight_layout minimizes the overlap between 2 sub-plots
  fig.suptitle("epoch: {}, step: {}".format(epoch, step))
  plt.savefig('image_at_epoch_{:04d}_step{:04d}.png'.format(epoch, step))
  plt.show()

In [None]:
# Training loop. Display generated images each epoch

epochs = 100

random_vector_for_generation = tf.random.normal(shape=[16, LATENT_DIM])
generate_and_save_images(decoder, 0, 0, random_vector_for_generation)

for epoch in range(epochs):
  print('Start of epoch %d' % (epoch,))

  # Iterate over the batches of the dataset.
  for step, x_batch_train in enumerate(training_dataset):
    with tf.GradientTape() as tape:
      # feed a batch to a VAE model
      reconstructed = vae(x_batch_train)

      # compute reconstruction loss
      flattened_inputs = tf.reshape(x_batch_train,shape = [-1])
      flattened_outputs = tf.reshape(reconstructed,shape = [-1])
      loss = mse_loss(flattened_inputs,flattened_outputs) * 64 * 64 * 3

      loss += sum(vae.losses)

    grads = tape.gradient(loss,vae.trainable_weights)
    optimizer.apply_gradients(zip(grads,vae.trainable_weights))

    loss_metric(loss)

    if step % 10 == 0:
      display.clear_output(wait=False)
      generate_and_save_images(decoder, epoch, step, random_vector_for_generation)
    print('Epoch: %s step: %s mean loss = %s' % (epoch, step, loss_metric.result().numpy()))

# Plot Reconstructed Images


In [None]:
test_dataset =  .take(1)
output_samples = []

for input_image in tfds.as_numpy(test_dataset):
      output_samples = input_image

idxs = np.random.choice(64, size=10)

vae_predicted = vae.predict(test_dataset)
display_results(output_samples[idxs], vae_predicted[idxs])

# Plot Generated Images


In [None]:
def plot_images(rows, cols, images, title):
    '''Displays images in a grid.'''
    grid = np.zeros(shape=(rows*64, cols*64, 3))
    for row in range(rows):
        for col in range(cols):
            grid[row*64:(row+1)*64, col*64:(col+1)*64, :] = images[row*cols + col]

    plt.figure(figsize=(12,12))
    plt.imshow(grid)
    plt.title(title)
    plt.show()

# initialize random inputs
test_vector_for_generation = tf.random.normal(shape=[64, LATENT_DIM])

# get predictions from the decoder model
predictions= decoder.predict(test_vector_for_generation)

# plot the predictions
plot_images(8,8,predictions,'Generated Images')

## Save the Model



In [None]:
vae.save("anime.h5")

In [None]:
# You can use this cell as a shortcut for downloading your model
from google.colab import files
files.download("anime.h5")