<a href="https://colab.research.google.com/github/valentinocc/Keras_cifar10/blob/master/cifar10_ELUcnn_tpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed May 29 10:42:05 2019

@author: val

basic CNN for cifar10 dataset

achieves 0.889 categorical accuracy
"""
import numpy as np
import tensorflow as tf
from tensorflow import keras
import os
from datetime import datetime
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
%load_ext tensorboard.notebook
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import Callback


In [0]:
class LR_adjuster(Callback):
    '''
    The learning rate is linearly increased from base_lr to max_lr, then linear decreased back to base_lr, and then
    held constant at a low learning rate (min_lr) for the final epochs (Around 20-35% of epochs)
    The idea was introduced by Leslie N. Smith in this paper: https://arxiv.org/abs/1506.01186
    # Example
        lra = LR_adjuster(15, min_lr = 0.002, max_lr = 0.1, base_lr = 0.04)
        model.fit(x_train, y_train, epochs=1, batch_size=128, callbacks=[lra])
    # Arguments
        epochs: the amount of epochs used to train the neural network
        base_lr: initial learning rate used in training
        max_lr: the highest learning rate to be used in training, the learning rate will decrease after reaching this rate
                this learning rate should be set using methods discussed in Smith's paper https://arxiv.org/pdf/1803.09820.pdf
        min_lr: the learning rate to be used for the last 20-30% of epochs
    '''

    def __init__(self, epochs, min_lr = 0.0015, base_lr=0.01, max_lr=0.1):
        self.base_lr = base_lr
        self.max_lr = max_lr
        self.min_lr = 0.0015
        self.epochs_max_point = (epochs - 5) / 2
        self.lr_step_size = (max_lr - base_lr) / self.epochs_max_point
        self.lrs = []
        self.lr = base_lr
        self.epochs = epochs

    def on_epoch_end(self, epoch, logs={}):

        if (epoch < self.epochs_max_point):
            self.lr = self.lr + self.lr_step_size
        elif (epoch >= self.epochs_max_point and epoch < self.epochs_max_point * 2):
            self.lr = self.lr - self.lr_step_size
        else:
            self.lr = self.min_lr

        K.set_value(self.model.optimizer.lr, self.lr)
        self.lrs.append(self.lr)
    
    def on_train_end(self, logs=None):
        plt.plot( np.arange(self.epochs), self.lrs)
        plt.show
        print(self.lrs)

In [0]:
def model(input_shape):

    model = tf.keras.models.Sequential()
    convolution_elu_dropout_block(model, 64, (3, 3), 'same', input_shape = input_shape, first_layer = True)
    convolution_elu_dropout_block(model, 64, (3, 3), 'same')
    model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    
    convolution_elu_dropout_block(model, 128, (3, 3), 'same')
    convolution_elu_dropout_block(model, 128, (3, 3), 'same')
    model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    
    convolution_elu_dropout_block(model, 256, (3, 3), 'same')
    convolution_elu_dropout_block(model, 256, (3, 3), 'same')
    convolution_elu_dropout_block(model, 256, (3, 3), 'same')
    model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    
    convolution_elu_dropout_block(model, 512, (3, 3), 'same')
    convolution_elu_dropout_block(model, 512, (3, 3), 'same')
    convolution_elu_dropout_block(model, 512, (3, 3), 'same')
    model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    
    convolution_elu_dropout_block(model, 512, (3, 3), 'same')
    convolution_elu_dropout_block(model, 512, (3, 3), 'same')
    convolution_elu_dropout_block(model, 512, (3, 3), 'same')
    model.add(tf.keras.layers.MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
    
    model.add(tf.keras.layers.Flatten())
    dense_elu_block(model, 512)
    final_dense_layer(model, 10)
    
    model.summary()
    
    return model

  
def convolution_elu_dropout_block(model, filters, filter_shape, padding_setting, input_shape = None, first_layer = False):

    if (first_layer):
        model.add(tf.keras.layers.Conv2D(filters, filter_shape, padding = padding_setting, data_format = 'channels_last', input_shape = input_shape))
    else:
        model.add(tf.keras.layers.Conv2D(filters, filter_shape, padding = padding_setting))
    
    model.add(tf.keras.layers.Activation('elu'))
    model.add(tf.keras.layers.Dropout(0.1))

    
    
def dense_elu_block(model, units):
    
    model.add(tf.keras.layers.Dense(units))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(tf.keras.layers.Dropout(0.4))
    
    
def final_dense_layer(model, classes_amount):
    model.add(tf.keras.layers.Dense(classes_amount))
    model.add(tf.keras.layers.Activation('softmax'))
    
    
def configure_TPU_model(model):
  
  TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
  tpu_model = tf.contrib.tpu.keras_to_tpu_model(model, strategy = tf.contrib.tpu.TPUDistributionStrategy(tf.contrib.cluster_resolver.TPUClusterResolver(tpu = TPU_WORKER)))
  tpu_model.compile( optimizer = tf.keras.optimizers.Adam(lr = 0.002), loss = tf.keras.losses.sparse_categorical_crossentropy, metrics = ['sparse_categorical_accuracy'])
  
  return tpu_model


def configure_augmented_data_generator(x_train):
  data_generator = ImageDataGenerator(rotation_range = 5, width_shift_range = 0.15, height_shift_range = 0.15, zoom_range = 0.13, horizontal_flip = True)
  data_generator.fit(x_train)
  
  return data_generator 

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.006*8, shuffle= True)


In [0]:
input_shape = x_train.shape[1:]
keras_model = model(input_shape)

#create TensorBoard callback
logdir="logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard = keras.callbacks.TensorBoard(log_dir=logdir)

tpu_model = configure_TPU_model(keras_model)

In [0]:
EPOCHS = 150

data_generator = configure_augmented_data_generator(x_train)
lra = LR_adjuster(EPOCHS, min_lr = 0.002, max_lr = 0.01, base_lr = 0.001)
tpu_model.fit_generator(data_generator.flow(x_train, y_train, batch_size = 32 * 8), steps_per_epoch = len(x_train) / (32 * 8), epochs=EPOCHS, validation_data = (x_valid, y_valid), callbacks = [tensorboard])
tpu_model.evaluate(x_test, y_test, batch_size = 32*8)
tpu_model.save_weights('./tpu_model_weights.h5', overwrite = True)

In [0]:
%tensorboard --logdir logs/scalars