# Carvana exploration notebook

Import libraries

In [9]:
# import libraries
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
import keras.backend as K
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Activation, UpSampling2D, BatchNormalization, Conv2DTranspose
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

import os
import itertools
import matplotlib.pyplot as plt

Get the file name IDs and split into train and validation sets

In [10]:
# define directories
dataset_dir = '/home/ubuntu/carvana/input/data/'
image_dir = dataset_dir + 'images'
mask_dir = dataset_dir + 'masks'

In [11]:
img_width = 128
img_height = 128
batch_size = 16
epochs = 80

In [12]:
# Define data augmentations for training set
data_gen_args = dict(rescale=1./255,
                     shear_range=0.1,
                     rotation_range=4,
                     zoom_range=0.1,
                     horizontal_flip=True,
                     width_shift_range=0.1,
                     height_shift_range=0.1,
                     validation_split=0.2) # 20% validation set

image_datagen = ImageDataGenerator(**data_gen_args)
mask_datagen = ImageDataGenerator(**data_gen_args)

seed = 42

# Create generator for training images
train_image_generator = image_datagen.flow_from_directory(
    image_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    seed=seed,
    subset='training')

# Create generator for training masks
train_mask_generator = mask_datagen.flow_from_directory(
    mask_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    seed=seed,
    subset='training')

# Create generator for validation images
val_image_generator = image_datagen.flow_from_directory(
    image_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    seed=seed,
    subset='validation')

# Create generator for validation masks
val_mask_generator = mask_datagen.flow_from_directory(
    mask_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    seed=seed,
    subset='validation')

num_samples_train = train_image_generator.n
num_samples_val = val_image_generator.n

# Combine generators into single training and validation generators for model training
train_generator = zip(train_image_generator, train_mask_generator)
validation_generator = zip(val_image_generator, val_mask_generator)

Found 4071 images belonging to 1 classes.
Found 4071 images belonging to 1 classes.
Found 1017 images belonging to 1 classes.
Found 1017 images belonging to 1 classes.


In [13]:
def dice_coeff(true, pred):
    smooth = 1.
    true_flat = K.flatten(true)
    pred_flat = K.flatten(pred)
    intersection = K.sum(true_flat * pred_flat)
    score = (2. * intersection + smooth) / (K.sum(true_flat) + K.sum(pred_flat) + smooth)
    return score

def dice_loss(true, pred):
    loss = 1 - dice_coeff(true, pred)
    return loss

In [48]:
def unet_base(input_shape=(128,128,3),
              num_classes=3, 
              first_filters=64):
    
    inputs = Input(shape=input_shape)
    
    down1 = Conv2D(first_filters, (3,3), activation='relu', padding='same')(inputs)
    down1 = BatchNormalization()(down1)
    down1 = Conv2D(first_filters, (3,3), activation='relu', padding='same')(down1)
    down1 = BatchNormalization()(down1)
    down1_pool = MaxPooling2D((2,2), strides=(2,2))(down1)
    print(down1.shape)
    
    down2 = Conv2D(first_filters*2, (3,3), activation='relu', padding='same')(down1_pool)
    down2 = BatchNormalization()(down2)
    down2 = Conv2D(first_filters*2, (3,3), activation='relu', padding='same')(down2)
    down2 = BatchNormalization()(down2)
    down2_pool = MaxPooling2D((2,2), strides=(2,2))(down2)
    print(down2.shape)
    
    down3 = Conv2D(first_filters*4, (3,3), activation='relu', padding='same')(down2_pool)
    down3 = BatchNormalization()(down3)
    down3 = Conv2D(first_filters*4, (3,3), activation='relu', padding='same')(down3)
    down3 = BatchNormalization()(down3)
    down3_pool = MaxPooling2D((2,2), strides=(2,2))(down3)
    print(down3.shape)
    
    center = Conv2D(first_filters*8, (3,3), activation='relu', padding='same')(down3_pool)
    center = BatchNormalization()(center)
    center = Conv2D(first_filters*8, (3,3), activation='relu', padding='same')(center)
    center = BatchNormalization()(center)
    print(center.shape)
    
    # up3 = UpSampling2D((2,2))(center)
    up3 = Conv2DTranspose(first_filters*4, (2,2), activation='relu', strides=(2,2), padding='same')(center)
    up3 = concatenate([up3, down3], axis=3)
    up3 = Conv2D(first_filters*4, (3,3), activation='relu', padding='same')(up3)
    up3 = BatchNormalization()(up3)
    up3 = Conv2D(first_filters*4, (3,3), activation='relu', padding='same')(up3)
    up3 = BatchNormalization()(up3)
    
    # up2 = UpSampling2D((2,2))(up3)
    up2 = Conv2DTranspose(first_filters*2, (2,2), activation='relu', strides=(2,2), padding='same')(up3)
    up2 = concatenate([up2, down2], axis=3)
    up2 = Conv2D(first_filters*2, (3,3), activation='relu', padding='same')(up2)
    up2 = BatchNormalization()(up2)
    up2 = Conv2D(first_filters*2, (3,3), activation='relu', padding='same')(up2)
    up2 = BatchNormalization()(up2)
    
    # up1 = UpSampling2D((2,2))(up2)
    up1 = Conv2DTranspose(first_filters, (2,2), activation='relu', strides=(2,2), padding='same')(up2)
    up1 = concatenate([up1, down1], axis=3)
    up1 = Conv2D(first_filters, (3,3), activation='relu', padding='same')(up1)
    up1 = BatchNormalization()(up1)
    up1 = Conv2D(first_filters, (3,3), activation='relu', padding='same')(up1)
    up1 = BatchNormalization()(up1)
    
    classify = Conv2D(num_classes, (1, 1), activation='sigmoid')(up1)
    
    model = Model(inputs=inputs, outputs=classify)
    
    model.compile(optimizer=RMSprop(lr=0.0001), loss=dice_loss, metrics=[dice_coeff])
    
    return model

In [49]:
callbacks = [EarlyStopping(monitor='val_loss',
                           patience=8,
                           verbose=1,
                           min_delta=1e-4),
             ReduceLROnPlateau(monitor='val_loss',
                               factor=0.1,
                               patience=4,
                               verbose=1,
                               epsilon=1e-4),
             ModelCheckpoint(monitor='val_loss',
                             filepath='weights/best_weights.hdf5',
                             save_best_only=True,
                             save_weights_only=True),
             TensorBoard(log_dir='logs')]



In [50]:
model = unet_base()

model.fit_generator(generator=train_generator,
                    steps_per_epoch=np.ceil(num_samples_train/batch_size),
                    epochs=epochs,
                    verbose=2,
                    callbacks=callbacks,
                    validation_data=validation_generator,
                    validation_steps=np.ceil(num_samples_val/batch_size))

(?, 128, 128, 64)
(?, 64, 64, 128)
(?, 32, 32, 256)
(?, 16, 16, 512)
Epoch 1/80
 - 199s - loss: 0.3184 - dice_coeff: 0.6816 - val_loss: 0.1746 - val_dice_coeff: 0.8254


OSError: Unable to create file (unable to open file: name = 'weights/best_weights.hdf5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)