In [7]:
                                                                                                                                                                                                                                                                                                    from __future__ import division
import random
import pprint
import sys
import time
import numpy as np
from optparse import OptionParser
import pickle

from keras import backend as K
from keras.optimizers import Adam, SGD, RMSprop
from keras.layers import Input
from keras.models import Model
from keras.utils import generic_utils
import os

from frcnn_lib import config, data_generators
from frcnn_lib import losses as losses
import frcnn_lib.roi_helpers as roi_helpers
from frcnn_lib.simple_parser import get_data

import tensorflow as tf
# used VGG model 
from frcnn_lib import vgg as nn_model

#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [8]:
# Configuration 


C = config.Config()

C.use_horizontal_flips = bool(1)
C.use_vertical_flips = bool(1)
C.rot_90 = bool(1)
C.model_path = "./model_frcnn.hdf5"
C.num_rois = 32  # default value is 32
C.network = "vgg"
C.base_net_weights = nn_model.get_weight_path()

# store the meta data related to training
config_output_filename = "config.pickle" 

In [9]:
# Read training data 
all_imgs, classes_count, class_mapping = get_data('./data/train/train.txt')

Parsing annotation files


In [10]:
if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

C.class_mapping = class_mapping
inv_map = {v: k for k, v in class_mapping.items()}
print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))

Training images per class:
{'bg': 0, 'cow': 1375, 'dog': 1147}
Num classes (including bg) = 3


In [11]:
# training related metadata
with open(config_output_filename, 'wb') as config_f:
    pickle.dump(C,config_f)
    print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))

Config has been written to config.pickle, and can be loaded when testing to ensure correct results


In [14]:
# Image preprocessing

# suffle all images
random.shuffle(all_imgs)
num_imgs = len(all_imgs)
train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

print('Num train samples {}'.format(len(train_imgs)))
print('Num val samples {}'.format(len(val_imgs)))

# data generator
data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn_model.get_img_output_length, K.image_dim_ordering(), mode='train')
data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn_model.get_img_output_length,K.image_dim_ordering(), mode='val')

if K.image_dim_ordering() == 'th':
    nput_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(None, 4))

Num train samples 2038
Num val samples 51


In [15]:
# define base network (VGG)
shared_layers = nn_model.nn_base(img_input, trainable=True)

In [17]:
# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn_model.rpn(shared_layers, num_anchors)
classifier = nn_model.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True)
model_rpn = Model(img_input, rpn[:2])
model_classifier = Model([img_input, roi_input], classifier)
# this is a model that holds both the RPN and the classifier, used to load/save weights for the models
model_all = Model([img_input, roi_input], rpn[:2] + classifier)

In [18]:
# load weight
print('loading weights from {}'.format(C.base_net_weights))
model_rpn.load_weights(C.base_net_weights, by_name=True)
model_classifier.load_weights(C.base_net_weights, by_name=True)

loading weights from vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [19]:
# compile model
lr = 0.00001

model_rpn.compile(optimizer=Adam(lr), loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors)])

model_classifier.compile(optimizer= Adam(lr), loss=[losses.class_loss_cls, losses.class_loss_regr(len(classes_count)-1)], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})

model_all.compile(optimizer='sgd', loss='mae')

In [20]:
def start_training(num_epochs, epoch_length , class_mapping, C, K,  data_gen_train, model_rpn, model_classifier,model_all):
    
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf
    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.')

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois//2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(pos_samples, C.num_rois//2, replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                          ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes))
                        print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))
                        model_all.save_weights("epoch"+str(epoch_num)+"_model_frcnn.hdf5")

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss,curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(C.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')

In [None]:
# Start training 

epoch_length = 200
num_epochs = 100

start_training(num_epochs, epoch_length, class_mapping, C, K,  data_gen_train, model_rpn, model_classifier,model_all)

Starting training
Epoch 1/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 3.140909090909091
Classifier accuracy for bounding boxes from RPN: 0.8940625
Loss RPN classifier: 4.237836672208325
Loss RPN regression: 0.17595094898715616
Loss Detector classifier: 0.37179569419140934
Loss Detector regression: 0.2626566331833601
Elapsed time: 251.2884180545807
Total loss decreased from inf to 5.0482399485702505, saving weights
Epoch 2/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 4.5048076923076925
Classifier accuracy for bounding boxes from RPN: 0.86203125
Loss RPN classifier: 3.452480220797872
Loss RPN regression: 0.1535650036390871
Loss Detector classifier: 0.39364021404791855
Loss Detector regression: 0.2753786526620388
Elapsed time: 229.40715098381042
Total loss decreased from 5.0482399485702505 to 4.275064091146916, saving weights
Epoch 3/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 6.679802955665025
Cla

Mean number of bounding boxes from RPN overlapping ground truth boxes: 9.648514851485148
Classifier accuracy for bounding boxes from RPN: 0.78140625
Loss RPN classifier: 1.9180448158526557
Loss RPN regression: 0.11345537960878574
Loss Detector classifier: 0.4737733021937311
Loss Detector regression: 0.3064168343320489
Elapsed time: 218.56632471084595
Total loss decreased from 2.8544882428778373 to 2.8116903319872213, saving weights
Epoch 12/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 9.223880597014926
Classifier accuracy for bounding boxes from RPN: 0.78421875
Loss RPN classifier: 1.9113121342960324
Loss RPN regression: 0.10816463206894696
Loss Detector classifier: 0.46324184355333275
Loss Detector regression: 0.3002480474114418
Elapsed time: 228.12019205093384
Total loss decreased from 2.8116903319872213 to 2.7829666573297542, saving weights
Epoch 13/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 8.768472906403941
Classifier a

Total loss decreased from 2.2202015509888624 to 2.1364755827150486, saving weights
Epoch 22/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 10.43
Classifier accuracy for bounding boxes from RPN: 0.80921875
Loss RPN classifier: 1.3370854190108894
Loss RPN regression: 0.10186806134763174
Loss Detector classifier: 0.4295893381116912
Loss Detector regression: 0.2705529150087386
Elapsed time: 220.99688935279846
Epoch 23/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 10.782178217821782
Classifier accuracy for bounding boxes from RPN: 0.80484375
Loss RPN classifier: 1.3465508085153988
Loss RPN regression: 0.10831288642482832
Loss Detector classifier: 0.4189660514215939
Loss Detector regression: 0.26480225307866934
Elapsed time: 219.60520935058594
Epoch 24/100
Mean number of bounding boxes from RPN overlapping ground truth boxes: 10.56
Classifier accuracy for bounding boxes from RPN: 0.81
Loss RPN classifier: 1.619009727215781
Loss RPN reg

In [None]:
# Testing 
