<a href="https://colab.research.google.com/github/valentinocc/Keras_cifar10/blob/master/keras_DenseNet_cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import tensorflow as tf
import keras
import os
from datetime import datetime
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, Activation, add, MaxPooling2D, Dense, Flatten
from keras.layers import Input, BatchNormalization, AveragePooling2D, concatenate, GlobalAveragePooling2D
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau, LearningRateScheduler 
from keras.regularizers import l2
from keras.initializers import he_uniform

#densenet-BC-100-12 architecture
#from figure 4 in this article https://towardsdatascience.com/densenet-on-cifar10-d5651294a1a8

%load_ext tensorboard.notebook

In [0]:
subtract_pixel_mean = True
CLASS_AMOUNT = 10
WEIGHT_DECAY = 0.0001
GROWTH_RATE = 12
LAYERS_PER_BLOCK = 16

In [0]:
def initial_convolution(inputs, weight_decay=0.0001, growth_rate=12):
  
  x = Conv2D(filters = 2 * growth_rate, kernel_size = (3, 3), kernel_initializer = 'he_uniform', padding = 'same', kernel_regularizer=l2(weight_decay))(inputs)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)
  return x

def bottleneck_function(inputs, weight_decay=0.0001, growth_rate=12, bn_size=4):
  
  x = Conv2D(filters = bn_size * growth_rate, kernel_size = (1, 1), kernel_initializer = 'he_uniform', kernel_regularizer=l2(weight_decay))(inputs)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)
  return x

def conv_block(inputs, weight_decay=0.0001, growth_rate=12):
  
  x = Conv2D(filters = growth_rate, kernel_size = (3, 3), padding='same', kernel_initializer = 'he_uniform', kernel_regularizer=l2(weight_decay))(inputs)
  x = Activation('relu')(x)
  x = BatchNormalization()(x)
  return x
  

def dense_layer(inputs, weight_decay=0.0001, growth_rate=12):
 
  x = bottleneck_function(inputs, weight_decay, growth_rate)
  x = conv_block(inputs, weight_decay, growth_rate)
  return x

def dense_block(inputs, weight_decay=0.0001, growth_rate=12, layers_per_block=16):
  
  features = inputs
  for i in range(layers_per_block):
    x = dense_layer(features, weight_decay, growth_rate) 
    features = concatenate([features, x], axis=3)
  
  return features


def transition_layer(inputs, weight_decay):
  x = Conv2D(filters = int(int(inputs.shape[3])/2), kernel_size = (1, 1), kernel_initializer = 'he_uniform', kernel_regularizer=l2(weight_decay))(inputs)
  x = AveragePooling2D(pool_size = (2, 2))(x)
  return x
  
def model(input_shape, weight_decay, growth_rate, layers_per_block, class_amount):
  
  inputs = Input(input_shape)
  x = initial_convolution(inputs, weight_decay, growth_rate)
  
  for i in range(2):
    x = dense_block(x, weight_decay, growth_rate, layers_per_block)
    x = transition_layer(x, weight_decay)
  
  x = dense_block(x, weight_decay, growth_rate, layers_per_block)
  x = GlobalAveragePooling2D()(x)
  
  outputs = Dense(class_amount, activation='softmax')(x)
    
  model = Model(inputs=inputs, outputs=outputs)
  
  return model

In [0]:
def learning_rate_schedule(epoch):
  
  lr = 1e-3
  if (epoch > 180):
    lr *= 5e-4
  elif (epoch > 160):
    lr *= 1e-3
  elif (epoch > 120):
    lr *= 1e-2
  elif (epoch > 80):
    lr *= 1e-1
  
  print(epoch, ", ",  lr)
  
  return lr

ReduceLROnPlateauObject = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
LRSchedulerObject = LearningRateScheduler(learning_rate_schedule, verbose=0)
logdir="logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)


callbacks = [ReduceLROnPlateauObject, LRSchedulerObject, tensorboard_callback]

In [0]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.006*8, shuffle= True)

# Normalize data.
x_train = x_train.astype('float32') / 255
x_valid = x_valid.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# If subtract pixel mean is enabled
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_valid -= x_train_mean
    x_test -= x_train_mean

In [0]:
INPUT_SHAPE = x_train.shape[1:]

model = model(INPUT_SHAPE, WEIGHT_DECAY, GROWTH_RATE, LAYERS_PER_BLOCK, CLASS_AMOUNT)

EPOCHS = 200
BATCH_SIZE = 32


model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.Adam(lr=learning_rate_schedule(0)),
              metrics=['accuracy'])


EPOCHS = 200
datagen = ImageDataGenerator(
        # set input mean to 0 over the dataset
        featurewise_center=False,
        # set each sample mean to 0
        samplewise_center=False,
        # divide inputs by std of dataset
        featurewise_std_normalization=False,
        # divide each input by its std
        samplewise_std_normalization=False,
        # apply ZCA whitening
        zca_whitening=False,
        # epsilon for ZCA whitening
        zca_epsilon=1e-06,
        # randomly rotate images in the range (deg 0 to 180)
        rotation_range=5,
        # randomly shift images horizontally
        width_shift_range=0.1,
        # randomly shift images vertically
        height_shift_range=0.1,
        # set range for random shear
        shear_range=0.,
        # set range for random zoom
        zoom_range=0.2,
        # set range for random channel shifts
        channel_shift_range=0.,
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        # value used for fill_mode = "constant"
        cval=0.,
        # randomly flip images
        horizontal_flip=True,
        # randomly flip images
        vertical_flip=False,
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(x_train)

# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
                    validation_data=(x_test, y_test), 
                    steps_per_epoch = x_train.shape[0] / BATCH_SIZE,
                    epochs=EPOCHS, verbose=1, callbacks=callbacks)
    
model.evaluate(x_test, y_test, batch_size = BATCH_SIZE)

In [0]:
%tensorboard --logdir logs/scalars