# GoogLeNet (Inception)

In [4]:
import numpy as np
import tensorflow as tf
import keras
from keras.layers import Input, Conv2D, MaxPool2D, Concatenate, AveragePooling2D, Dropout, Dense, Flatten
from keras.models import Model

## The Model

In [5]:
# This is an inception unit used several time in the GoogLeNet Architecture
# Its primary use is to make the model computationally inexpensive by adding 
# 1x1 inexpensive layers to decrease the dimensionality and then
# use the more expensive 3x3 or 5x5 layers

def Inception(X_input, filters_1, filters_3_reduce, filters_3, filters_5_reduce, filters_5, max_pool):
    X1 = Conv2D(filters = filters_1, kernel_size = (1,1), activation = 'relu')(X_input)
    X2 = Conv2D(filters = filters_3_reduce, kernel_size = (1,1), activation = 'relu')(X_input)
    X2 = Conv2D(filters = filters_3, kernel_size = (3,3), activation = 'relu', padding = "same")(X2)
    X3 = Conv2D(filters = filters_5_reduce, kernel_size = (1,1), activation = 'relu')(X_input)
    X3 = Conv2D(filters = filters_5, kernel_size = (5,5), activation = 'relu', padding = "same")(X3)
    X4 = MaxPool2D(pool_size = (3,3), strides = (1,1), padding = "same")(X_input)
    X4 = Conv2D(filters = max_pool, kernel_size = (1,1), activation = 'relu')(X4)
    return Concatenate(axis = -1)([X1, X2, X3, X4])

In [6]:
X_input = Input(shape = (224, 224, 3))
X = Conv2D(filters = 64, kernel_size = (7,7), strides = (2,2), activation = 'relu', padding = "same")(X_input)
X = MaxPool2D(pool_size = (3,3), strides = (2,2), padding = "same")(X)
X = Conv2D(filters = 192, kernel_size = (3,3), activation = 'relu', padding = "same")(X)
X = MaxPool2D(pool_size = (3,3), strides = (2,2), padding = "same")(X)

X = Inception(X, 64, 96, 128, 16, 32, 32)
X = Inception(X, 128, 128, 192, 32, 96, 64)
X = MaxPool2D(pool_size = (3,3), strides = (2,2), padding = 'same')(X)

X = Inception(X, 192, 96, 208, 16, 48, 64)
X = Inception(X, 160, 112, 224, 24, 64, 64)
X = Inception(X, 128, 128, 256, 24, 64, 64)
X = Inception(X, 112, 144, 228, 32, 64, 64)
X = Inception(X, 256, 160, 320, 32, 128, 128)
X = MaxPool2D(pool_size = (3,3), strides = (2,2), padding = 'same')(X)

X = Inception(X, 256, 160, 320, 32, 128, 128)
X = Inception(X, 384, 192, 384, 48, 128, 128)
X = AveragePooling2D(pool_size = (7,7), strides = (1,1))(X)

X = Flatten()(X)
X = Dropout(rate = 0.4)(X)
X = Dense(units = 1000, activation = 'softmax')(X)

googlenet = Model(inputs = X_input, outputs = X)
googlenet.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv2d_56 (Conv2D)              (None, 112, 112, 64) 9472        input_2[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_13 (MaxPooling2D) (None, 56, 56, 64)   0           conv2d_56[0][0]                  
__________________________________________________________________________________________________
conv2d_57 (Conv2D)              (None, 56, 56, 192)  110784      max_pooling2d_13[0][0]           
_______________________________________________________________________________________

In [7]:
googlenet.save("../working/GoogLeNet.keras")

## Data Preparation

In [8]:
def random_crop(img, random_crop_size):
    assert img.shape[2] == 3
    height, width = img.shape[0], img.shape[1]
    dy, dx = random_crop_size
    x = np.random.randint(0, width - dx + 1)
    y = np.random.randint(0, height - dy + 1)
    return img[y:(y+dy), x:(x+dx), :]

def crop_generator(batches, crop_length):
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3));
        for i in range(batch_x.shape[0]):
            batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
        yield(batch_crops, batch_y)

In [9]:
from keras.preprocessing.image import ImageDataGenerator

TRAIN_DIR = "../input/imagenetmini-1000/imagenet-mini/train/"
VAL_DIR = "../input/imagenetmini-1000/imagenet-mini/val/"
batch_size = 64

train_generator = ImageDataGenerator(horizontal_flip = True)
val_generator = ImageDataGenerator(horizontal_flip = True)

train_gen = train_generator.flow_from_directory(TRAIN_DIR, target_size=(256, 256), batch_size=batch_size, class_mode='categorical')
val_gen = val_generator.flow_from_directory(VAL_DIR, target_size=(256, 256), batch_size=batch_size, class_mode='categorical')

train_batches = crop_generator(train_gen, 224)
val_batches = crop_generator(val_gen, 224)

Found 34745 images belonging to 1000 classes.
Found 3923 images belonging to 1000 classes.


## Training the Model

In [10]:
googlenet.compile(loss = 'categorical_crossentropy', optimizer = 'adam',
                 metrics = ['accuracy'])

In [None]:
from livelossplot import PlotLossesKeras

epochs = 5

history = googlenet.fit_generator(train_batches, 
                              epochs = epochs, 
                              steps_per_epoch = 34745 // batch_size, 
                              validation_data = val_batches,
                              validation_steps = 3923 // batch_size,
                              callbacks = [PlotLossesKeras()],
                              verbose = 1)

Epoch 1/5
  6/542 [..............................] - ETA: 4:20 - loss: 6.9047 - accuracy: 0.0000e+00