In [1]:
import numpy as np
import pandas as pd
import bcolz
import time
import logging
import datetime

import sys
sys.path.append('..')

from bcolzutils import *
from util import *

import keras.backend as K
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, LearningRateScheduler
from keras import optimizers
from keras import regularizers

from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input as vgg19_preprocess_input

from keras.applications.mobilenet import MobileNet
from keras.applications.mobilenet import preprocess_input as mobile_preprocess_input

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config = config)

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
arch = "vgg19"

percent = 0.25
#percent = 1
epochs=15
num_classes = 133
batch_size = 48
lr=1e-3
momentum=0.9
weight_decay = 1e-5
test_prefix=""

def lr_schedule(epoch):
    """ divides the lr by 10 every 5 epochs"""
    n = epoch // 5
    return lr / (10 ** n)

if percent < 1:
    test_prefix = "test"
    
model_path = f'../saved_models/weights.best.fc_layers.{arch}_{test_prefix}.hdf5'
loss_history_csv_name = f'fc_layers.{arch}_loss_history_{test_prefix}.csv'

d = datetime.datetime.today()

logging.basicConfig(level='DEBUG',
                    handlers=[
                              logging.StreamHandler()])
log = logging.getLogger(__name__)

basedir="/home/tutysara/src/myprojects/dog-project/dogImages"

train_name = basedir + '/pp_train_data'
valid_name = basedir + '/pp_valid_data'
test_name = basedir + '/pp_test_data'

In [3]:
%ls -l {basedir}

total 556
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:00 [0m[01;34mbottleneck_features_vgg19_test_data.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:00 [01;34mbottleneck_features_vgg19_test_labels.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:12 [01;34mbottleneck_features_vgg19_test_y_pred.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:12 [01;34mbottleneck_features_vgg19_test_y_true.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:00 [01;34mbottleneck_features_vgg19_train_data.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:00 [01;34mbottleneck_features_vgg19_train_labels.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:00 [01;34mbottleneck_features_vgg19_valid_data.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  2 20:00 [01;34mbottleneck_features_vgg19_valid_labels.bclz[0m/
drwxrwxr-x   4 tutysara tutysara   4096 Mar  6 18:57 [01;34mpp_test_data_data.bclz[0m/
drwxrwxr-x   4 tutysara tut

In [4]:
# read bcolz data
bclz_valid_data = bcolz.carray(rootdir= valid_name+'_data.bclz', mode='r')
bclz_test_data = bcolz.carray(rootdir= test_name + '_data.bclz', mode='r')
bclz_train_data = bcolz.carray(rootdir= train_name+ '_data.bclz', mode='r')


bclz_valid_labels = bcolz.carray(rootdir= valid_name+'_labels.bclz', mode='r')
bclz_test_labels = bcolz.carray(rootdir= test_name + '_labels.bclz', mode='r')
bclz_train_labels = bcolz.carray(rootdir= train_name+ '_labels.bclz', mode='r')

print(bclz_valid_data.shape, bclz_valid_labels.shape)
print(bclz_test_data.shape, bclz_test_labels.shape) 
print(bclz_train_data.shape, bclz_train_labels.shape) 

(835, 224, 224, 3) (835, 133)
(836, 224, 224, 3) (836, 133)
(6680, 224, 224, 3) (6680, 133)


In [5]:
# take percentage of data if required
bclz_valid_data3 = bclz_valid_data
bclz_test_data3 = bclz_test_data
bclz_train_data3 = bclz_train_data

bclz_valid_labels3 = bclz_valid_labels
bclz_test_labels3 = bclz_test_labels
bclz_train_labels3 = bclz_train_labels
    
if percent < 1:
    valid_len = int(len(bclz_valid_data) * percent)
    test_len = int(len(bclz_test_data) * percent)
    train_len = int(len(bclz_train_data) * percent)

    bclz_valid_data3 = bclz_valid_data[:valid_len]
    bclz_test_data3 = bclz_test_data[:test_len]
    bclz_train_data3 = bclz_train_data[:train_len]

    bclz_valid_labels3 = bclz_valid_labels[:valid_len]
    bclz_test_labels3 = bclz_test_labels[:test_len]
    bclz_train_labels3 = bclz_train_labels[:train_len]
    
print(bclz_valid_data3.shape, bclz_valid_labels3.shape)
print(bclz_test_data3.shape, bclz_test_labels3.shape)
print(bclz_train_data3.shape, bclz_train_labels3.shape)

(208, 224, 224, 3) (208, 133)
(209, 224, 224, 3) (209, 133)
(1670, 224, 224, 3) (1670, 133)


In [6]:
valid_gen =bcolz_data_generator(bclz_valid_data3, bclz_valid_labels3, batch_size=batch_size, shuffle=True)
test_gen =bcolz_data_generator(bclz_test_data3, bclz_test_labels3, batch_size=batch_size, shuffle=True)
train_gen =bcolz_data_generator(bclz_train_data3, bclz_train_labels3, batch_size=batch_size, shuffle=True)

In [7]:
# Generate a model with all layers (with top)
vgg19 = VGG19(weights='imagenet', include_top=True)

#Add a layer where input is the output of the  second last layer 
x = Dense(num_classes, activation='softmax', name='my_predictions')(vgg19.layers[-2].output)

for layer in vgg19.layers:
    layer.trainable = False
    
#Then create the corresponding model 
my_model = Model(input=vgg19.input, output=x)
my_model.layers[-3].trainable = True
my_model.layers[-2].trainable = True
my_model.layers[-1].trainable = True
#my_model.summary()

  # This is added back by InteractiveShellApp.init_path()


In [8]:
for layer in my_model.layers:
    if hasattr(layer, 'kernel_regularizer'):
        layer.kernel_regularizer= regularizers.l2(weight_decay)

In [9]:
for layer in my_model.layers:
    if hasattr(layer, 'kernel_regularizer'):
        print(layer.name, layer.trainable,  layer.kernel_regularizer)
    else:
         print(layer.name, layer.trainable)

input_1 False
block1_conv1 False <keras.regularizers.L1L2 object at 0x7f37b844a198>
block1_conv2 False <keras.regularizers.L1L2 object at 0x7f37b844a1d0>
block1_pool False
block2_conv1 False <keras.regularizers.L1L2 object at 0x7f37b844a0f0>
block2_conv2 False <keras.regularizers.L1L2 object at 0x7f37b844a128>
block2_pool False
block3_conv1 False <keras.regularizers.L1L2 object at 0x7f37b844a048>
block3_conv2 False <keras.regularizers.L1L2 object at 0x7f37b844a208>
block3_conv3 False <keras.regularizers.L1L2 object at 0x7f37b844a2e8>
block3_conv4 False <keras.regularizers.L1L2 object at 0x7f37b844a390>
block3_pool False
block4_conv1 False <keras.regularizers.L1L2 object at 0x7f37b844a320>
block4_conv2 False <keras.regularizers.L1L2 object at 0x7f37b844a358>
block4_conv3 False <keras.regularizers.L1L2 object at 0x7f37b844a278>
block4_conv4 False <keras.regularizers.L1L2 object at 0x7f37b844a2b0>
block4_pool False
block5_conv1 False <keras.regularizers.L1L2 object at 0x7f37b844a240>
bloc

In [12]:
checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
csv_logger = CSVLogger(loss_history_csv_name, append=True, separator=',')
lrscheduler = LearningRateScheduler(schedule=lr_schedule)

my_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=lr, momentum=momentum),
              metrics=['accuracy'])

my_model.fit_generator(
#my_model.fit(
          train_gen,
          steps_per_epoch= (1 + int(train_len // batch_size)),
          #bclz_train_data3, bclz_train_labels3,
          epochs=epochs,
          validation_data=valid_gen,
          validation_steps= (1 + int(valid_len // batch_size)),
          #validation_data=(bclz_valid_data3, bclz_valid_labels3),
          callbacks=[lrscheduler] )

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
 5/35 [===>..........................] - ETA: 10s - loss: 4.8547e-04 - acc: 1.0000

KeyboardInterrupt: 

In [14]:
for layer in my_model.layers:
    layer.trainable = True
    print(layer.name, layer.trainable)

input_1 True
block1_conv1 True
block1_conv2 True
block1_pool True
block2_conv1 True
block2_conv2 True
block2_pool True
block3_conv1 True
block3_conv2 True
block3_conv3 True
block3_conv4 True
block3_pool True
block4_conv1 True
block4_conv2 True
block4_conv3 True
block4_conv4 True
block4_pool True
block5_conv1 True
block5_conv2 True
block5_conv3 True
block5_conv4 True
block5_pool True
flatten True
fc1 True
fc2 True
my_predictions True


In [15]:
my_model.fit_generator(train_gen,
          steps_per_epoch= (1 + int(bclz_train_data3.shape[0] // batch_size)),
          epochs=epochs,
          validation_data=valid_gen,
          validation_steps= (1 + int(bclz_valid_data3.shape[0] // batch_size)),
          callbacks=[early_stopping, lrscheduler] )

  'Discrepancy between trainable weights and collected trainable'


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 00007: early stopping


<keras.callbacks.History at 0x7fda4846c1d0>

In [14]:
checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1)
csv_logger = CSVLogger(loss_history_csv_name, append=True, separator=',')
lrscheduler = LearningRateScheduler(schedule=lr_schedule)

my_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=lr, momentum=momentum),
              metrics=['accuracy'])
my_model.fit(bclz_train_data3, bclz_train_labels3,
          epochs=epochs,
          validation_data=(bclz_valid_data3, bclz_valid_labels3),
          callbacks=[early_stopping, lrscheduler])

Train on 1670 samples, validate on 208 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 00004: early stopping


<keras.callbacks.History at 0x7fb1168e9f28>