In [1]:
%load_ext autoreload
%autoreload 2
import os

from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, LearningRateScheduler
from keras.optimizers import SGD, RMSprop

from data_generator import WrapperImageDataGenerator, custom_generator, get_weights

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

Using TensorFlow backend.


In [2]:
EPOCHS = 100
IMAGES_ON_TRAIN = 44183 // 4
IMAGES_ON_VALIDATION = 10966 // 4

TRAIN_DATA_DIR = '/home/vs/Source/visionhack/data/trainset/data_to_fit_with_weight_train/'
VALID_DATA_DIR = '/home/vs/Source/visionhack/data/trainset/data_to_fit_with_weight_valid/'
IMAGE_SHAPE = (139, 221)
BATCH_SIZE = 64

train_generator = custom_generator('data/trainset/data_to_fit_with_weight_train/', IMAGE_SHAPE, BATCH_SIZE)
valid_generator = custom_generator('data/trainset/data_to_fit_with_weight_valid/', IMAGE_SHAPE, BATCH_SIZE)
weights = get_weights('data/trainset/data_to_fit_with_weight')

In [3]:
MODEL_DATA_DIR = 'data/models'
LR = 0.045

#callbacks
checkpoint_path = 'checkpoints_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
checkpoint_path = os.path.join(MODEL_DATA_DIR, checkpoint_path)
checkpointer = ModelCheckpoint(filepath=checkpoint_path, verbose=1, period=1)

stoper = EarlyStopping(min_delta=0.001, patience=7)

reducer = LearningRateScheduler(lambda e: LR * 0.94 ** (e // 2))

log_path = 'log.csv'
log_path = os.path.join(MODEL_DATA_DIR, log_path)
logger = CSVLogger(filename=log_path, append=True)

callbacks = [checkpointer, stoper, reducer]

In [4]:
# create the base pre-trained model
base_model = InceptionV3(input_shape=(IMAGE_SHAPE[0], IMAGE_SHAPE[1], 3), weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
#x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(10, activation='sigmoid')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
# model.summary()
# model.load_weights('data/models/finetuned_checkpoints_weights.02-0.11.hdf5')

In [5]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# model.summary()
    
optimizer = RMSprop(lr=LR, decay=0.9, epsilon=1)
    
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['MSE'])

model.fit_generator(train_generator, 
                    epochs=EPOCHS,
                    steps_per_epoch = int(IMAGES_ON_TRAIN / BATCH_SIZE),
                    verbose=1, 
                    validation_data=valid_generator,
                    callbacks=callbacks,  
                    validation_steps=int(0.1 * IMAGES_ON_VALIDATION / BATCH_SIZE), 
                    class_weight=weights, 
                    initial_epoch=0)

Epoch 1/100


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
 32/172 [====>.........................] - ETA: 274s - loss: 0.3661 - mean_squared_error: 0.1049

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import argparse
import os
import tqdm
import skvideo.io
import skimage.io
import skimage.transform
import numpy as np
from keras.applications.inception_v3 import preprocess_input

from test import predict_labels

labels = ["z", "c", "m", "t", "d", "b", "e", "x", "f"]
meaning_labels = ["zebra", "zebra sign", "bridge", "under bridge", "wiper", "bump", "city enter", "city exit", "bump sign"]

video_path = 'data/trainset/video/akn.031.029.left.avi'
Y = predict_labels(video_path, model, IMAGE_SHAPEIMAGE_SHAPE, )
c_images, c_class = Y.shape
for i in range(c_class):
    y = Y[:, i]
    plt.figure(figsize=(12, 2))
    plt.plot(range(c_images), y, 'bs')
    plt.title(meaning_labels[-i])
    plt.ylim((0, 1))
    plt.show()

In [None]:
FINETUNE_EPOCHS = 100
FINETUNE_LR = 0.0001

for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate

optimizer = RMSprop(lr=LR, decay=0.9, epsilon=1)
    
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['MSE'])

model.fit_generator(train_generator, 
                    epochs=EPOCHS,
                    steps_per_epoch = int(IMAGES_ON_TRAIN / BATCH_SIZE),
                    verbose=1, 
                    validation_data=valid_generator,
                    callbacks=callbacks,  
                    validation_steps=int(0.1 * IMAGES_ON_VALIDATION / BATCH_SIZE), 
                    class_weight=None, 
                    initial_epoch=0)