# Multi GPU Training



In [1]:
import keras
print (keras.__version__)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


2.2.0


In [2]:
import os
import sys
import tensorflow as tf
from keras.utils import multi_gpu_model
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator

import numpy as np
import math

# fixed random seed to have consistent results
np.random.seed(123)

train_dir = "data/train"
val_dir = "data/test"
epochs = 5
nb_GPU = 4
batch_size = 30 * nb_GPU
nb_train_samples = 3000
nb_validation_samples = 300
img_width, img_height = 299, 299 # fixed size for InceptionV3

The idea here is simple. Load one high quality pre-trained network and remove last layer that is supposed to do final prediction. Replace that layer with a final layer doing the prediction for our two classes and train only the weights for the added layer. As long as you are using a network trained for similar problem like recognising animals or birds, the first layers before prediction are already trained to understand features and representations that would probably apply well to dogs and cats.

In [3]:
# data prep
train_datagen =  ImageDataGenerator(
  preprocessing_function=preprocess_input
)

test_datagen = ImageDataGenerator(
  preprocessing_function=preprocess_input
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size, class_mode='binary'
)

validation_generator = test_datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size, class_mode='binary'
)

Found 3000 images belonging to 2 classes.
Found 300 images belonging to 2 classes.


In the next step, we will grab Inception V3 network with its pre-trained weights simply remove the last layer and add our own last `Dense` layer.

In [4]:
with tf.device('/cpu:0'):
    # setup model
    base_model = InceptionV3(weights='imagenet', include_top=False) # include_top=False excludes final fully connected layer

    x = base_model.output
    x = GlobalAveragePooling2D()(x) # this layer prevents overfitting and generally recommended between conv layers and dense ones
    x = Dense(1024, activation='relu')(x) #new FC layer
    prediction = Dense(1, activation='sigmoid')(x) # new sigmoid layer
    model = Model(inputs=base_model.input, outputs=prediction)

    for layer in base_model.layers:
        layer.trainable = False

In [5]:
# make the model parallel
multi_gpu_model = multi_gpu_model(model, gpus = nb_GPU)

In [6]:
multi_gpu_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [7]:
# Let us do the training
multi_gpu_model.fit_generator(
    generator = train_generator,
    epochs = epochs,
    steps_per_epoch = math.ceil(nb_train_samples / batch_size),    
    validation_data = validation_generator,
    validation_steps = math.ceil(nb_validation_samples/batch_size),
    workers=24, use_multiprocessing=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7ff55a375ef0>

In [8]:
test_datagen1 = ImageDataGenerator(
  preprocessing_function=preprocess_input 
)

test_generator1 = test_datagen1.flow_from_directory(val_dir, 
                                                    target_size=(img_width, img_height), 
                                                    batch_size=batch_size,  class_mode='binary')

test_loss, test_acc = multi_gpu_model.evaluate_generator(test_generator1, steps=50)
print('test acc:', test_acc)
print('test loss:', test_loss)

Found 300 images belonging to 2 classes.
test acc: 0.9632936460631234
test loss: 0.10322590043679589
