SSD: Run anchor creation only once, instead of on each step

tryolabs · Mar 20, 2018 · d52eb0b · d52eb0b
1 parent 0cbd647
commit d52eb0b
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 43 deletions.
diff --git a/luminoth/models/ssd/base_config.yml b/luminoth/models/ssd/base_config.yml
@@ -58,6 +58,13 @@ train:
   # Number of epochs (complete dataset batches) to run
   num_epochs: 10000
 
+  # Image visualization mode, options = train, eval, debug, (empty). Default=(empty)
+  image_vis: debug
+
+eval:
+  # Image visualization mode, options = train, eval, debug, (empty). Default=(empty)
+  image_vis: eval
+
 dataset:
   type: tfrecord
   # From which directory to read the dataset

diff --git a/luminoth/models/ssd/ssd.py b/luminoth/models/ssd/ssd.py
@@ -12,7 +12,7 @@
 from luminoth.utils.losses import smooth_l1_loss
 from luminoth.utils.vars import get_saver
 
-from luminoth.utils.bbox_transform_tf import clip_boxes
+from luminoth.utils.bbox_transform import clip_boxes
 
 
 DEFAULT_ENDPOINTS = {
@@ -128,25 +128,22 @@ def _build(self, image, gt_boxes=None, is_training=True):
         class_probabilities = slim.softmax(class_scores)
 
         # Generate anchors
-        self.anchors = self.generate_all_anchors(feature_maps)
+        raw_anchors_per_featmap = self.generate_raw_anchors(feature_maps)
         all_anchors_list = []
         for i, (feat_map_name, feat_map) in enumerate(feature_maps.items()):
             # TODO: Anchor generation should be simpler. We should create
             #       them in image scale from the start instead of scaling
             #       them to their feature map size.
-            feat_map_shape = feat_map.get_shape().as_list()[1:3]
-            adjusted_bboxes = adjust_bboxes(
-                self.anchors[feat_map_name],
-                tf.cast(feat_map_shape[0], tf.float32),
-                tf.cast(feat_map_shape[1], tf.float32),
-                tf.cast(tf.shape(image)[1], tf.float32),
-                tf.cast(tf.shape(image)[2], tf.float32)
+            feat_map_shape = feat_map.shape.as_list()[1:3]
+            scaled_bboxes = adjust_bboxes(
+                raw_anchors_per_featmap[feat_map_name], feat_map_shape[0],
+                feat_map_shape[1], self.image_shape[0], self.image_shape[1]
             )
-            # Clip anchors to the image.
-            adjusted_bboxes = clip_boxes(
-                adjusted_bboxes, tf.cast(tf.shape(image)[1:3], tf.int32))
-            all_anchors_list.append(adjusted_bboxes)
-        all_anchors = tf.concat(all_anchors_list, axis=0)
+            clipped_bboxes = clip_boxes(scaled_bboxes, self.image_shape)
+            all_anchors_list.append(clipped_bboxes)
+        all_anchors = np.concatenate(all_anchors_list, axis=0)
+        # They were using float64, is all this precision necesary?
+        all_anchors = tf.convert_to_tensor(all_anchors, dtype=tf.float64)
 
         prediction_dict = {}
         if gt_boxes is not None:
@@ -205,7 +202,6 @@ def _build(self, image, gt_boxes=None, is_training=True):
         # TODO add variable summaries
 
         if self._debug:
-            prediction_dict['anchors'] = self.anchors
             prediction_dict['all_anchors'] = all_anchors
             prediction_dict['all_anchors_target'] = all_anchors
             prediction_dict['cls_prob'] = class_probabilities
@@ -321,7 +317,7 @@ def loss(self, prediction_dict):
 
             return total_loss
 
-    def generate_all_anchors(self, feature_maps):
+    def generate_raw_anchors(self, feature_maps):
         """
         Returns a dictionary containing the anchors per feature map.
 
@@ -367,32 +363,28 @@ def _generate_anchors(self, feature_map_shape, anchor_reference):
                 using the (x1, y1, x2, y2) convention.
         """
         with tf.variable_scope('generate_anchors'):
-            shift_x = tf.range(feature_map_shape[1])
-            shift_y = tf.range(feature_map_shape[0])
-            shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
+            shift_x = np.arange(feature_map_shape[1])
+            shift_y = np.arange(feature_map_shape[0])
+            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 
-            shift_x = tf.reshape(shift_x, [-1])
-            shift_y = tf.reshape(shift_y, [-1])
+            shift_x = np.reshape(shift_x, [-1])
+            shift_y = np.reshape(shift_y, [-1])
 
-            shifts = tf.stack(
+            shifts = np.stack(
                 [shift_x, shift_y, shift_x, shift_y],
                 axis=0
             )
 
-            shifts = tf.transpose(shifts)
+            shifts = np.transpose(shifts)
             # Shifts now is a (H x W, 4) Tensor
 
             # Expand dims to use broadcasting sum.
             all_anchors = (
-                tf.expand_dims(anchor_reference, axis=0) +
-                tf.cast(tf.expand_dims(shifts, axis=1), tf.float64)
+                np.expand_dims(anchor_reference, axis=0) +
+                np.expand_dims(shifts, axis=1)
             )
-
             # Flatten
-            all_anchors = tf.reshape(
-                all_anchors, (-1, 4)
-            )
-            return all_anchors
+            return np.reshape(all_anchors, (-1, 4))
 
     @property
     def summary(self):

diff --git a/luminoth/models/ssd/ssd_utils.py b/luminoth/models/ssd/ssd_utils.py
@@ -14,23 +14,21 @@ def adjust_bboxes(bboxes, old_height, old_width, new_height, new_width):
     Returns:
         Tensor with shape (num_bboxes, 4), with the adjusted bboxes.
     """
-    # We normalize bounding boxes points.
-    bboxes_float = tf.to_float(bboxes)
-    x_min, y_min, x_max, y_max = tf.unstack(bboxes_float, axis=1)
-
-    x_min = x_min / old_width
-    y_min = y_min / old_height
-    x_max = x_max / old_width
-    y_max = y_max / old_height
+    # x_min, y_min, x_max, y_max = np.split(bboxes, 4, axis=1)
+    # import ipdb; ipdb.set_trace()
+    x_min = bboxes[:, 0] / old_width
+    y_min = bboxes[:, 1] / old_height
+    x_max = bboxes[:, 2] / old_width
+    y_max = bboxes[:, 3] / old_height
 
     # Use new size to scale back the bboxes points to absolute values.
-    x_min = tf.to_int32(x_min * new_width)
-    y_min = tf.to_int32(y_min * new_height)
-    x_max = tf.to_int32(x_max * new_width)
-    y_max = tf.to_int32(y_max * new_height)
+    x_min = x_min * new_width
+    y_min = y_min * new_height
+    x_max = x_max * new_width
+    y_max = y_max * new_height
 
     # Concat points and label to return a [num_bboxes, 4] tensor.
-    return tf.stack([x_min, y_min, x_max, y_max], axis=1)
+    return np.stack([x_min, y_min, x_max, y_max], axis=1)
 
 
 def generate_anchors_reference(ratios, scales, num_anchors, feature_map_shape):