Skip to content

Commit

Permalink
SSD: Run anchor creation only once, instead of on each step
Browse files Browse the repository at this point in the history
  • Loading branch information
joaqo authored and nagitsu committed Mar 20, 2018
1 parent 0cbd647 commit d52eb0b
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 43 deletions.
7 changes: 7 additions & 0 deletions luminoth/models/ssd/base_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ train:
# Number of epochs (complete dataset batches) to run
num_epochs: 10000

# Image visualization mode, options = train, eval, debug, (empty). Default=(empty)
image_vis: debug

eval:
# Image visualization mode, options = train, eval, debug, (empty). Default=(empty)
image_vis: eval

dataset:
type: tfrecord
# From which directory to read the dataset
Expand Down
52 changes: 22 additions & 30 deletions luminoth/models/ssd/ssd.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from luminoth.utils.losses import smooth_l1_loss
from luminoth.utils.vars import get_saver

from luminoth.utils.bbox_transform_tf import clip_boxes
from luminoth.utils.bbox_transform import clip_boxes


DEFAULT_ENDPOINTS = {
Expand Down Expand Up @@ -128,25 +128,22 @@ def _build(self, image, gt_boxes=None, is_training=True):
class_probabilities = slim.softmax(class_scores)

# Generate anchors
self.anchors = self.generate_all_anchors(feature_maps)
raw_anchors_per_featmap = self.generate_raw_anchors(feature_maps)
all_anchors_list = []
for i, (feat_map_name, feat_map) in enumerate(feature_maps.items()):
# TODO: Anchor generation should be simpler. We should create
# them in image scale from the start instead of scaling
# them to their feature map size.
feat_map_shape = feat_map.get_shape().as_list()[1:3]
adjusted_bboxes = adjust_bboxes(
self.anchors[feat_map_name],
tf.cast(feat_map_shape[0], tf.float32),
tf.cast(feat_map_shape[1], tf.float32),
tf.cast(tf.shape(image)[1], tf.float32),
tf.cast(tf.shape(image)[2], tf.float32)
feat_map_shape = feat_map.shape.as_list()[1:3]
scaled_bboxes = adjust_bboxes(
raw_anchors_per_featmap[feat_map_name], feat_map_shape[0],
feat_map_shape[1], self.image_shape[0], self.image_shape[1]
)
# Clip anchors to the image.
adjusted_bboxes = clip_boxes(
adjusted_bboxes, tf.cast(tf.shape(image)[1:3], tf.int32))
all_anchors_list.append(adjusted_bboxes)
all_anchors = tf.concat(all_anchors_list, axis=0)
clipped_bboxes = clip_boxes(scaled_bboxes, self.image_shape)
all_anchors_list.append(clipped_bboxes)
all_anchors = np.concatenate(all_anchors_list, axis=0)
# They were using float64, is all this precision necesary?
all_anchors = tf.convert_to_tensor(all_anchors, dtype=tf.float64)

prediction_dict = {}
if gt_boxes is not None:
Expand Down Expand Up @@ -205,7 +202,6 @@ def _build(self, image, gt_boxes=None, is_training=True):
# TODO add variable summaries

if self._debug:
prediction_dict['anchors'] = self.anchors
prediction_dict['all_anchors'] = all_anchors
prediction_dict['all_anchors_target'] = all_anchors
prediction_dict['cls_prob'] = class_probabilities
Expand Down Expand Up @@ -321,7 +317,7 @@ def loss(self, prediction_dict):

return total_loss

def generate_all_anchors(self, feature_maps):
def generate_raw_anchors(self, feature_maps):
"""
Returns a dictionary containing the anchors per feature map.
Expand Down Expand Up @@ -367,32 +363,28 @@ def _generate_anchors(self, feature_map_shape, anchor_reference):
using the (x1, y1, x2, y2) convention.
"""
with tf.variable_scope('generate_anchors'):
shift_x = tf.range(feature_map_shape[1])
shift_y = tf.range(feature_map_shape[0])
shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
shift_x = np.arange(feature_map_shape[1])
shift_y = np.arange(feature_map_shape[0])
shift_x, shift_y = np.meshgrid(shift_x, shift_y)

shift_x = tf.reshape(shift_x, [-1])
shift_y = tf.reshape(shift_y, [-1])
shift_x = np.reshape(shift_x, [-1])
shift_y = np.reshape(shift_y, [-1])

shifts = tf.stack(
shifts = np.stack(
[shift_x, shift_y, shift_x, shift_y],
axis=0
)

shifts = tf.transpose(shifts)
shifts = np.transpose(shifts)
# Shifts now is a (H x W, 4) Tensor

# Expand dims to use broadcasting sum.
all_anchors = (
tf.expand_dims(anchor_reference, axis=0) +
tf.cast(tf.expand_dims(shifts, axis=1), tf.float64)
np.expand_dims(anchor_reference, axis=0) +
np.expand_dims(shifts, axis=1)
)

# Flatten
all_anchors = tf.reshape(
all_anchors, (-1, 4)
)
return all_anchors
return np.reshape(all_anchors, (-1, 4))

@property
def summary(self):
Expand Down
24 changes: 11 additions & 13 deletions luminoth/models/ssd/ssd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,21 @@ def adjust_bboxes(bboxes, old_height, old_width, new_height, new_width):
Returns:
Tensor with shape (num_bboxes, 4), with the adjusted bboxes.
"""
# We normalize bounding boxes points.
bboxes_float = tf.to_float(bboxes)
x_min, y_min, x_max, y_max = tf.unstack(bboxes_float, axis=1)

x_min = x_min / old_width
y_min = y_min / old_height
x_max = x_max / old_width
y_max = y_max / old_height
# x_min, y_min, x_max, y_max = np.split(bboxes, 4, axis=1)
# import ipdb; ipdb.set_trace()
x_min = bboxes[:, 0] / old_width
y_min = bboxes[:, 1] / old_height
x_max = bboxes[:, 2] / old_width
y_max = bboxes[:, 3] / old_height

# Use new size to scale back the bboxes points to absolute values.
x_min = tf.to_int32(x_min * new_width)
y_min = tf.to_int32(y_min * new_height)
x_max = tf.to_int32(x_max * new_width)
y_max = tf.to_int32(y_max * new_height)
x_min = x_min * new_width
y_min = y_min * new_height
x_max = x_max * new_width
y_max = y_max * new_height

# Concat points and label to return a [num_bboxes, 4] tensor.
return tf.stack([x_min, y_min, x_max, y_max], axis=1)
return np.stack([x_min, y_min, x_max, y_max], axis=1)


def generate_anchors_reference(ratios, scales, num_anchors, feature_map_shape):
Expand Down

0 comments on commit d52eb0b

Please sign in to comment.