# Image Classification

In [1]:
import pandas
import sklearn
import tensorflow as tf
from sklearn.model_selection import KFold
import matplotlib
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import SimpleRNN, LSTM, Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from tensorflow.keras.metrics import categorical_crossentropy, sparse_categorical_crossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.layers import BatchNormalization, Activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.client import device_lib
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from functools import partial

In [2]:
import torch
torch.cuda.empty_cache()
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]
import tensorflow as tf
print(tf.__version__)
print(get_available_devices())

2.9.3
['/device:CPU:0', '/device:GPU:0']


In [3]:
# Prep pixels for tfds datasets load dataset
def prep_pixels2(train, test, target_train, target_test):
    img_rows=28
    img_cols=28
    X_train = train.reshape(train.shape[0], img_rows, img_cols, 1)
    X_test = test.reshape(test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    train_norm = X_train.astype('float32')
    test_norm = X_test.astype('float32')
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    target_train = to_categorical(target_train)
    target_test =  to_categorical(target_test)
    return train_norm, test_norm, target_train, target_test

In [4]:
#import tensorflow as tf
from tensorflow.keras.utils import to_categorical
# Prep pixels for tfds datasets load dataset
def prep_pixels(image, label, depth=10):
    img_rows=28
    img_cols=28
    image = tf.cast(image, tf.float32)
    image = tf.divide(image, 255)
    train_norm = tf.image.resize(image, (32, 32))
    target = tf.one_hot(label, depth=depth)
    return train_norm, target

In [5]:
# CNN model
def val_cnn_model(n_channels=1):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(32, 32, n_channels)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(320, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation='softmax'))
    opt = SGD(learning_rate=0.01, momentum=0.9)
    #model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(loss=categorical_crossentropy, optimizer=Adadelta(), metrics=['accuracy'])
    return model

In [6]:
# CNN optimized for MNIST
def val_cnn_mnist(n_channels=1):
    model = Sequential()
    model.add(Conv2D(6, (5, 5), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, n_channels)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(16, (5, 5), activation='relu', kernel_initializer='he_uniform'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(120, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(84, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(10, activation='softmax'))
    opt = SGD(learning_rate=0.1, momentum=0.9)
    #model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(loss=sparse_categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
    return model

In [7]:
# CNN Optimized for CIFAR10
def val_cnn_cifar(n_depth, n_channels=3):
    weight_decay = 1e-4
    model = Sequential()
    model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(32, 32, n_channels)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.3))

    model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(Activation('elu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.4))

    model.add(Flatten())
    model.add(Dense(n_depth, activation='softmax'))
    opt=RMSprop(learning_rate=0.001,decay=1e-5)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [8]:
# RNN network for images
def val_rnn_model(x_train):
    '''i = Input(shape=x_train[0].shape)
    x = LSTM(128)(i)
    x = Dense(10, activation='softmax')(x)
    model=Model(i, x)'''
    
    model=Sequential()
    #model.add(Input(shape=x_train[0].shape))
    model.add(LSTM(128, input_shape=x_train[0].shape))
    model.add(Dense(10, activation='softmax'))
    opt = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    #model.compile(loss=categorical_crossentropy, optimizer=Adadelta(), metrics=['accuracy'])
    return model

In [9]:
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Model

# Pretrained MobileNet network for image recognition
def val_mn_model(depth, n_channels=3):
    bottom_model = MobileNet(weights='imagenet', include_top=False, input_shape=(32,32, n_channels))
    for layer in bottom_model.layers:
        layer.trainable = False
    top_model = Flatten(name='flatten')(bottom_model.output)#top_model = Dense(1024, activation='relu')(bottom_model.output)
    top_model = Dense(depth, activation='relu')(top_model)
    top_model = Dense(depth, activation='softmax')(top_model)
    model = Model(inputs = bottom_model.inputs, outputs=top_model)
    opt = Adam(learning_rate=0.01)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [10]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

# Pretrained VGG Network for image recognition
def val_vgg_model(depth, n_channels=3):
    bottom_model = VGG16(weights='imagenet', include_top=False, input_shape=(32,32, n_channels))
    for layer in bottom_model.layers:
        layer.trainable = False
    top_model = Flatten(name='flatten')(bottom_model.output)
    top_model = Dense(depth, activation='relu')(top_model)
    top_model = Dense(depth, activation='softmax')(top_model)
    model = Model(inputs = bottom_model.inputs, outputs=top_model)
    opt = Adam(learning_rate=0.001)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [11]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Model

# Pretrained ResNet Network for image recognition
def val_resnet_model(depth, n_channels=3):
    bottom_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32,32, n_channels))
    for layer in bottom_model.layers:
        layer.trainable = False
    top_model = Flatten(name='flatten')(bottom_model.output)
    top_model = Dense(depth, activation='relu')(top_model)
    top_model = Dense(depth, activation='softmax')(top_model)
    model = Model(inputs = bottom_model.inputs, outputs=top_model)
    opt = Adam(learning_rate=0.01)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [12]:
def training_step(inputs, targets, model, loss_fn, optimizer):
    with tf.GradientTape() as tape:
        logits = model(inputs)
        loss_value = loss_fn(targets, logits)
    gradients = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    # Update the accuracy metric
    accuracy_metric.update_state(targets, logits)
    
    return loss_value

@tf.function
def train_step(dataset, model, loss_fn, optimizer):
    training_loss = tf.constant(0.0)
    num_batches = tf.constant(0)
    
    for batch in dataset:
        inputs, targets = batch
        loss = training_step(inputs, targets, model, loss_fn, optimizer)
        training_loss += loss
        num_batches += 1
    
    # Calculate the mean loss over all batches
    mean_loss = training_loss / tf.cast(num_batches, dtype=tf.float32)
    
    # Get the current accuracy from the accuracy metric
    current_accuracy = accuracy_metric.result()
    
    # Reset the accuracy metric for the next epoch
    accuracy_metric.reset_states()
    
    return mean_loss, current_accuracy
  
def evaluate_image_model(train_dataset, val_dataset, num_epochs, n_channels=3, depth=10, model_name='cifar'):
    batch_size=64
    if model_name=='cifar':
        
        model=val_cnn_cifar(depth, n_channels) #Needs 80pct accuracy Eurosat gets > 80 at epoch 2 and overfits afterwards #Needs 80pct accuracy CIFAR10 gets > 80 at epoch 5 for train and epoch 6 for validation. Each epoch is 12 min
        print(model.summary())
    elif model_name=='vgg':
        model=val_vgg_model(depth, n_channels) # Reached 80pct at epoch 6. 10 minutes per epoch
    elif model_name=='resnet':
        model=val_resnet_model(depth, n_channels)
    else:
        model=val_mn_model(depth, n_channels)
    model.fit(train_dataset, epochs=num_epochs, steps_per_epoch=60000 // 64, validation_data=val_dataset, verbose=2)
    _, acc = model.evaluate(val_dataset, verbose=2)
    return acc


In [14]:
# Train on RNN network
def evaluate_image_model_rnn(x_train, y_train, x_test, y_test):
    # Expand dimensions to include a channel (grayscale)
    x_train = np.expand_dims(x_train, axis=-1)
    x_test = np.expand_dims(x_test, axis=-1)
    model=val_cnn_mnist()
    print(model.summary())
    train_generator=ImageDataGenerator(rotation_range=7, width_shift_range=0.05, shear_range=0, height_shift_range=0.07, zoom_range=0.05)
    test_generator=ImageDataGenerator()
    train_generator=train_generator.flow(x_train, y_train, batch_size=64)
    test_generator = test_generator.flow(x_test, y_test, batch_size=64)
    model.fit(train_generator, validation_data=test_generator, epochs=5, verbose=2)
    _, acc = model.evaluate(x_test, y_test, verbose=2)
    return acc

In [14]:
#CNN optimized for CIFAR10
batch_size=64
train_ds, test_ds = tfds.load('cifar10', split=['train[:75%]','train[75%:]'], as_supervised=True)
train = train_ds.map(partial(prep_pixels, depth=10)).cache().shuffle(100).batch(64).prefetch(tf.data.experimental.AUTOTUNE).repeat()
test = test_ds.map(partial(prep_pixels, depth=10)).cache().batch(64).prefetch(tf.data.experimental.AUTOTUNE)
epochs=10
with tf.device('/device:GPU:0'):
    evaluate_image_model(train, test, epochs, n_channels=3, depth=10, model_name='cifar')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 activation (Activation)     (None, 32, 32, 32)        0         
                                                                 
 batch_normalization (BatchN  (None, 32, 32, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 32, 32, 32)        0         
                                                                 
 batch_normalization_1 (Batc  (None, 32, 32, 32)       128       
 hNormalization)                                        

In [16]:
#Eurosat - VGG16
train_ds, test_ds = tfds.load('eurosat', split=['train[:75%]','train[75%:]'], as_supervised=True)
train = train_ds.map(partial(prep_pixels, depth=10)).cache().shuffle(100).batch(64).prefetch(tf.data.experimental.AUTOTUNE).repeat()
test = test_ds.map(partial(prep_pixels, depth=10)).cache().prefetch(tf.data.experimental.AUTOTUNE).batch(64)
epochs=20
with tf.device('/device:GPU:0'):
    evaluate_image_model(train, test, epochs, n_channels=3, depth=10, model_name='vgg')

Epoch 1/20
937/937 - 17s - loss: 1.1099 - accuracy: 0.6323 - val_loss: 0.7921 - val_accuracy: 0.7391 - 17s/epoch - 18ms/step
Epoch 2/20
937/937 - 14s - loss: 0.7169 - accuracy: 0.7573 - val_loss: 0.6911 - val_accuracy: 0.7711 - 14s/epoch - 15ms/step
Epoch 3/20
937/937 - 15s - loss: 0.6442 - accuracy: 0.7773 - val_loss: 0.6569 - val_accuracy: 0.7747 - 15s/epoch - 16ms/step
Epoch 4/20
937/937 - 15s - loss: 0.6060 - accuracy: 0.7904 - val_loss: 0.6238 - val_accuracy: 0.7868 - 15s/epoch - 16ms/step
Epoch 5/20
937/937 - 16s - loss: 0.5812 - accuracy: 0.7977 - val_loss: 0.6081 - val_accuracy: 0.7910 - 16s/epoch - 17ms/step
Epoch 6/20
937/937 - 18s - loss: 0.5614 - accuracy: 0.8047 - val_loss: 0.6007 - val_accuracy: 0.7942 - 18s/epoch - 19ms/step
Epoch 7/20
937/937 - 18s - loss: 0.5474 - accuracy: 0.8097 - val_loss: 0.5913 - val_accuracy: 0.7970 - 18s/epoch - 19ms/step
Epoch 8/20
937/937 - 17s - loss: 0.5360 - accuracy: 0.8129 - val_loss: 0.5859 - val_accuracy: 0.7984 - 17s/epoch - 18ms/step


In [18]:
#Eurosat - ResNet50
train_ds, test_ds = tfds.load('eurosat', split=['train[:75%]','train[75%:]'], as_supervised=True)
train = train_ds.map(partial(prep_pixels, depth=10)).cache().shuffle(100).batch(64).prefetch(tf.data.experimental.AUTOTUNE).repeat()
test = test_ds.map(partial(prep_pixels, depth=10)).cache().prefetch(tf.data.experimental.AUTOTUNE).batch(64)
epochs=20
with tf.device('/device:GPU:0'):
    evaluate_image_model(train, test, epochs, n_channels=3, depth=10, model_name='resnet')

Epoch 1/20
937/937 - 122s - loss: 2.2969 - accuracy: 0.1107 - val_loss: 2.2951 - val_accuracy: 0.1123 - 122s/epoch - 130ms/step
Epoch 2/20
937/937 - 102s - loss: 2.2960 - accuracy: 0.1101 - val_loss: 2.2947 - val_accuracy: 0.1073 - 102s/epoch - 109ms/step
Epoch 3/20
937/937 - 92s - loss: 2.2960 - accuracy: 0.1099 - val_loss: 2.2950 - val_accuracy: 0.1073 - 92s/epoch - 99ms/step
Epoch 4/20
937/937 - 99s - loss: 2.2960 - accuracy: 0.1103 - val_loss: 2.2956 - val_accuracy: 0.1142 - 99s/epoch - 106ms/step
Epoch 5/20
937/937 - 102s - loss: 2.2959 - accuracy: 0.1096 - val_loss: 2.2959 - val_accuracy: 0.1142 - 102s/epoch - 109ms/step
Epoch 6/20
937/937 - 99s - loss: 2.2959 - accuracy: 0.1100 - val_loss: 2.2954 - val_accuracy: 0.1108 - 99s/epoch - 106ms/step
Epoch 7/20
937/937 - 92s - loss: 2.2959 - accuracy: 0.1105 - val_loss: 2.2958 - val_accuracy: 0.1108 - 92s/epoch - 98ms/step
Epoch 8/20
937/937 - 95s - loss: 2.2960 - accuracy: 0.1102 - val_loss: 2.2966 - val_accuracy: 0.1108 - 95s/epoch -

In [15]:
#Eurosat - MobileNet
train_ds, test_ds = tfds.load('eurosat', split=['train[:75%]','train[75%:]'], as_supervised=True)
train = train_ds.map(partial(prep_pixels, depth=10)).cache().shuffle(100).batch(64).prefetch(tf.data.experimental.AUTOTUNE).repeat()
test = test_ds.map(partial(prep_pixels, depth=10)).cache().prefetch(tf.data.experimental.AUTOTUNE).batch(64)
epochs=20
with tf.device('/device:GPU:0'):
    evaluate_image_model(train, test, epochs, n_channels=3, depth=10, model_name='mn')





Epoch 1/20
937/937 - 35s - loss: 1.6672 - accuracy: 0.3930 - val_loss: 1.6172 - val_accuracy: 0.4095 - 35s/epoch - 38ms/step
Epoch 2/20
937/937 - 12s - loss: 1.5955 - accuracy: 0.4172 - val_loss: 1.6074 - val_accuracy: 0.4071 - 12s/epoch - 13ms/step
Epoch 3/20
937/937 - 12s - loss: 1.5887 - accuracy: 0.4199 - val_loss: 1.5989 - val_accuracy: 0.4151 - 12s/epoch - 13ms/step
Epoch 4/20
937/937 - 12s - loss: 1.5861 - accuracy: 0.4223 - val_loss: 1.6088 - val_accuracy: 0.4076 - 12s/epoch - 13ms/step
Epoch 5/20
937/937 - 12s - loss: 1.5861 - accuracy: 0.4209 - val_loss: 1.5935 - val_accuracy: 0.4150 - 12s/epoch - 13ms/step
Epoch 6/20
937/937 - 13s - loss: 1.5832 - accuracy: 0.4217 - val_loss: 1.5974 - val_accuracy: 0.4175 - 13s/epoch - 13ms/step
Epoch 7/20
937/937 - 13s - loss: 1.5814 - accuracy: 0.4209 - val_loss: 1.6104 - val_accuracy: 0.4052 - 13s/epoch - 14ms/step
Epoch 8/20
937/937 - 12s - loss: 1.5807 - accuracy: 0.4221 - val_loss: 1.5968 - val_accuracy: 0.4200 - 12s/epoch - 13ms/step


In [16]:
# CIFAR100 with a CNN netowork optimized for CIFAR10
train_ds, test_ds = tfds.load('cifar100', split=['train[:75%]','train[75%:]'], as_supervised=True)
train = train_ds.map(partial(prep_pixels, depth=100)).cache().shuffle(100).batch(64).prefetch(tf.data.experimental.AUTOTUNE).repeat()
test = test_ds.map(partial(prep_pixels, depth=100)).cache().batch(64).prefetch(tf.data.experimental.AUTOTUNE)
epochs=20
with tf.device('/device:GPU:0'):
    evaluate_image_model(train, test, epochs, n_channels=3, depth=100, model_name='cifar')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 activation (Activation)     (None, 32, 32, 32)        0         
                                                                 
 batch_normalization (BatchN  (None, 32, 32, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 32, 32, 32)        0         
                                                                 
 batch_normalization_1 (Batc  (None, 32, 32, 32)       128       
 hNormalization)                                        

In [17]:
#MNIST Dataset using tf.kersas.dataset instead of tfds
(x_train, y_train), (x_test, y_test)=tf.keras.datasets.mnist.load_data()
x_train = x_train/255.0
x_test=x_test/255.0
print(get_available_devices())
with tf.device('/device:GPU:0'):
    evaluate_image_model_rnn(x_train, y_train, x_test, y_test)

['/device:CPU:0', '/device:GPU:0']
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 24, 24, 6)         156       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 12, 12, 6)        0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 8, 8, 16)          2416      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 4, 4, 16)         0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 256)               0         
                                                                 
 dense_3 (Dense)   

In [20]:
import pandas
import sklearn
import tensorflow as tf
from matplotlib import pyplot
from PIL import Image
from numpy import asarray
from mtcnn.mtcnn import MTCNN
from scipy import ndimage
from sklearn.model_selection import KFold
import tensorflow as tf
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    image = tf.reshape(image, [*IMAGE_SIZE, 3]) # explicit size needed for TPU
    return image

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "class": tf.io.FixedLenFeature([], tf.int64),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.cast(example['class'], tf.int32)
    return image, label # returns a dataset of (image, label) pairs

def val_load_dataset(filenames, labeled=True, ordered=False):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # disregarding data order. Order does not matter since we will be shuffling the data anyway.

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=10) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=10)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset

# Variational Autoencoder

In [25]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import Model, Input
def val_vae(input_encoder):
    inputs = Input(shape=(28, 28, 1))

    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(32, (2, 2), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = BatchNormalization()(x)
    x = Conv2D(16, (2, 2), activation='relu', padding='same')(x)
    x = Conv2D(4, (2, 2), activation='relu', padding='same')(x)
    x = Conv2D(1, (2, 2), activation='relu', padding='same')(x)
    x = Flatten()(x)
    encoded = Dense(2, activation='relu')(x)

    encoder = Model(inputs=inputs, outputs=encoded)
    
    encoded_inputs = Input(shape=(2,))

    x = Dense(4, activation='relu')(encoded_inputs)
    x = Reshape((2, 2, 1))(x)
    x = Conv2D(4, (2, 2), activation='relu', padding='same')(x)
    x = Conv2D(16, (2, 2), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = UpSampling2D((7, 7))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

    decoder = Model(inputs=encoded_inputs, outputs=decoded)
    
    x = encoder(inputs)
    x = decoder(x)
    model = Model(inputs=inputs, outputs=x)
    model.compile(optimizer=Adam(0.01), loss='binary_crossentropy', metrics=['accuracy', 'mse'])

    print(model.summary())
    
    clr = ReduceLROnPlateau(
        monitor='loss',
        factor=0.5,
        patience=3,
        min_delta=0.01,
        cooldown=0,
        min_lr=1e-7,
        verbose=1)

    model.fit(
        x_train,
        x_train,
        batch_size=256,
        epochs=10,
        shuffle=True,
        validation_data=(x_test, x_test),
        callbacks=[clr])

    return model, encoder, decoder


In [26]:
# Train on RNN network
def evaluate_image_model_vae(x_train, y_train, x_test, y_test):
    scores, histories = list(), list()
    with tf.device('/device:GPU:0'):
        model, encoder, decoder=val_vae(x_train)
        model.fit(x_train, x_train, validation_data=(x_test, x_test),  epochs=3, verbose=2)
    return

In [28]:
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
(x_train, y_train), (x_test, y_test)=tf.keras.datasets.mnist.load_data()
x_train, x_test, y_train, y_test = prep_pixels2(x_train, x_test, y_train, y_test)
with tf.device('/device:GPU:0'):
    evaluate_image_model_vae(x_train, y_train, x_test, y_test)

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 model_4 (Functional)        (None, 2)                 34889     
                                                                 
 model_5 (Functional)        (None, 28, 28, 1)         42417     
                                                                 
Total params: 77,306
Trainable params: 77,082
Non-trainable params: 224
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 9/10
Epoch 10/10
Epoch 1/3
1875/1875 - 34s - loss: 0.2672 - accurac