In [62]:
import tensorflow as tf
import numpy as np
import cv2

from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
# tf.enable_eager_execution()

In [178]:
img = cv2.imread('dog-facts-cat-facts.jpg')
img = cv2.resize(img, (4, 3))

In [179]:
h, w = img.shape[:2]
print(h, w)
# X = (size=(1, 3, h, w))  # Construct input data
X = tf.convert_to_tensor(img.reshape(-1, h, w, 3))
print(X.shape[-3:-1])

3 4
(3, 4)


In [180]:
def multibox_prior(X, sizes, ratios):
    in_height, in_width = X.shape[-3:-1]
    num_sizes, num_ratios = len(sizes), len(ratios)
    boxes_per_pixel = (num_sizes + num_ratios - 1)
    size_tensor = tf.Variable(sizes)
    ratio_tensor = tf.Variable(ratios)
    # Offsets are required to move the anchor to the center of a pixel. Since
    # a pixel has height=1 and width=1, we choose to offset our centers by 0.5
    offset_h, offset_w = 0.5, 0.5
    steps_h = 1.0 / in_height  # Scaled steps in y axis
    steps_w = 1.0 / in_width  # Scaled steps in x axis
    # print("steps_h, steps_w", steps_h, steps_w)

    # Generate all center points for the anchor boxes
    center_h = (tf.range(in_height, dtype=tf.float32) + offset_h) * steps_h
    center_w = (tf.range(in_width, dtype=tf.float32) + offset_w) * steps_w
    shift_y, shift_x = tf.meshgrid(center_h, center_w, indexing='ij')
    shift_y, shift_x = shift_y.reshape(-1), shift_x.reshape(-1)
    # print("center_h, center_w", center_h, center_w)
    # print("shift_y, shift_x", shift_y, shift_x)

    # Generate `boxes_per_pixel` number of heights and widths that are later
    # used to create anchor box corner coordinates (xmin, xmax, ymin, ymax)
    w = tf.concat((size_tensor * tf.sqrt(ratio_tensor[0]), sizes[0] * tf.sqrt(ratio_tensor[1:])), axis=0) * in_height / in_width  # Handle rectangular inputs
    h = tf.concat((size_tensor / tf.sqrt(ratio_tensor[0]), sizes[0] / tf.sqrt(ratio_tensor[1:])), axis=0)
    # print(w, h)
    # Divide by 2 to get half height and half width
    # print(type(tf.stack((-w, -h, w, h)).T.numpy()))
    # return
    anchor_manipulations = np.tile(tf.stack((-w, -h, w, h), axis=0).numpy(), in_height * in_width) / 2
    anchor_manipulations = anchor_manipulations.T
    # print('anchor_manipulations: ', anchor_manipulations.shape)
    # print(anchor_manipulations)
    # Each center point will have `boxes_per_pixel` number of anchor boxes, so
    # generate a grid of all anchor box centers with `boxes_per_pixel` repeats
    # print('outgrid: ', tf.stack([shift_x, shift_y, shift_x, shift_y], axis=1).numpy().repeat(boxes_per_pixel, 0))
    out_grid = tf.stack([shift_x, shift_y, shift_x, shift_y], axis=1).numpy().repeat(boxes_per_pixel, 0)
    output = out_grid + anchor_manipulations
    print('output: ', output.shape)
    # print(output)
    return tf.expand_dims(output, 0)

In [181]:
Y = multibox_prior(X, sizes=[0.75, 0.5, 0.25], ratios=[1, 2, 0.5])
Y.shape

output:  (60, 4)


TensorShape([1, 60, 4])

In [182]:
boxes = Y.reshape(h, w, 5, 4)
print(boxes.shape)
boxes[2, 2, 0, :]

(3, 4, 5, 4)


<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.34375   , 0.45833337, 0.90625   , 1.2083334 ], dtype=float32)>

In [208]:
## TODO: check for working

def box_iou(boxes1, boxes2):
    """Compute pairwise IoU across two lists of anchor or bounding boxes."""
    box_area = lambda boxes: ((boxes[:, 2] - boxes[:, 0]) *
                              (boxes[:, 3] - boxes[:, 1]))
    # Shape of `boxes1`, `boxes2`, `areas1`, `areas2`: (no. of boxes1, 4),
    # (no. of boxes2, 4), (no. of boxes1,), (no. of boxes2,)
    areas1 = box_area(boxes1)
    areas2 = box_area(boxes2)
    # Shape of `inter_upperlefts`, `inter_lowerrights`, `inters`: (no. of
    # boxes1, no. of boxes2, 2)
    inter_upperlefts = tf.math.maximum(boxes1[:, None, :2], boxes2[:, :2])
    inter_lowerrights = tf.math.minimum(boxes1[:, None, 2:], boxes2[:, 2:])
    inters = (inter_lowerrights - inter_upperlefts).numpy().clip(min=0)
    # print(inters)
    # Shape of `inter_areas` and `union_areas`: (no. of boxes1, no. of boxes2)
    inter_areas = inters[:, :, 0] * inters[:, :, 1]
    union_areas = areas1[:, None] + areas2 - inter_areas
    return inter_areas / union_areas

In [337]:
def assign_anchor_to_bbox(ground_truth, anchors, iou_threshold=0.4):
    """Assign closest ground-truth bounding boxes to anchor boxes."""
    num_anchors, num_gt_boxes = anchors.shape[0], ground_truth.shape[0]
    # Element x_ij in the i-th row and j-th column is the IoU of the anchor
    # box i and the ground-truth bounding box j
    jaccard = box_iou(anchors, ground_truth)
    print('jaccard: ', jaccard)
    # Initialize the tensor to hold the assigned ground-truth bounding box for
    # each anchor
    anchors_bbox_map = tf.fill((num_anchors,), -1)
    print('anchors_bbox_map: ', anchors_bbox_map)
    # Assign ground-truth bounding boxes according to the threshold
    # print(tf.math.reduce_max(jaccard, axis=1))
    
    max_ious = tf.math.reduce_max(jaccard, axis=1)
    indices = tf.math.argmax(jaccard, axis=1)
    print('max_ious, indices: ', max_ious, indices)
    anc_i = tf.where(max_ious >= iou_threshold).reshape(-1)
    print('anc_i: ', anc_i)
    box_j = indices[max_ious >= iou_threshold]
    print('box_j: ', box_j)
    anchors_bbox_map_np = np.array(anchors_bbox_map)
    anchors_bbox_map_np[np.array(anc_i[:])] = box_j
    print('anchors_bbox_map: ', anchors_bbox_map_np)
    col_discard = tf.fill((num_anchors,), -1)
    row_discard = tf.fill((num_gt_boxes,), -1)
    print('row_discard, col_discard: ', row_discard, col_discard)
    jaccard = np.array(jaccard)
    for i in range(num_gt_boxes):
        max_idx = tf.math.argmax(jaccard[:, i])  # Find the largest IoU
        # print('max_idx: ', max_idx)
        box_idx = i
        anc_idx = max_idx
        print('box_idx, anc_idx: ', box_idx, anc_idx)
        anchors_bbox_map_np[anc_idx] = box_idx
        jaccard[:, box_idx] = col_discard
        jaccard[anc_idx, :] = row_discard
    # print('jaccard: ', jaccard)
    return tf.convert_to_tensor(anchors_bbox_map_np)

In [304]:
ground_truth = tf.convert_to_tensor([[0, 0.51, 0.35, 0.76, 0.8],
                         [1, 0.34, 0.42, 0.48, 0.78],
                        [1, 0.3, 0.42, 0.4, 0.7]])
anchors = tf.convert_to_tensor([[0, 0.1, 0.2, 0.3], [0.51, 0.37, 0.7, 0.84],
                    [0.34, 0.40, 0.5, 0.72], [0.3, 0.42, 0.4, 0.7],
                    [0.3, 0.42, 0.48, 0.78]])

In [300]:
jaccard = box_iou(anchors, ground_truth[:, 1:])
jaccard

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[0.        , 0.        , 0.        ],
       [0.68026644, 0.        , 0.        ],
       [0.        , 0.7046982 , 0.26923078],
       [0.27667972, 0.        , 0.        ],
       [0.        , 0.77777773, 0.43209884]], dtype=float32)>

In [424]:
def box_corner_to_center(cords):
    try:
        ret = cords.numpy()
    except:
        ret = np.copy(cords)
    ret[:, 0] = cords[:, 0] + (cords[:, 2] - cords[:, 0]) / 2
    ret[:, 1] = cords[:, 1] + (cords[:, 3] - cords[:, 1]) / 2
    ret[:, 2] = cords[:, 2] - cords[:, 0]
    ret[:, 3] = cords[:, 3] - cords[:, 1]
    return tf.convert_to_tensor(ret)

In [468]:
def box_center_to_corner(cords):
    try:
        ret = cords.numpy()
    except:
        ret = np.copy(cords)
    ret[:, 0] = cords[:, 0] - cords[:, 2] / 2
    ret[:, 1] = cords[:, 1] - cords[:, 3] / 2
    ret[:, 2] = ret[:, 2] + ret[:, 0]
    ret[:, 3] = ret[:, 3] + ret[:, 1]
    return tf.convert_to_tensor(ret)

In [435]:
def offset_boxes(anchors, assigned_bb, eps=1e-6):
    print("offset_boxes", anchors.shape, assigned_bb.shape)
    print(assigned_bb, assigned_bb)
    """Transform for anchor box offsets."""
    # change anchors to yolo form (cx, cy, width, height)
    c_anc = box_corner_to_center(anchors)
    print('c_anc: ', c_anc)
    # change bbox to yolo form (cx, cy, width, height)
    c_assigned_bb = box_corner_to_center(assigned_bb)
    print('c_assigned_bb: ', c_assigned_bb)
    offset_xy = 10 * (c_assigned_bb[:, :2] - c_anc[:, :2]) / c_anc[:, 2:]
    offset_wh = 5 * tf.math.log(eps + c_assigned_bb[:, 2:] / c_anc[:, 2:])
    offset = tf.concat([offset_xy, offset_wh], axis=1)
    return offset

In [440]:
def multibox_target(anchors, labels):
    """Label anchor boxes using ground-truth bounding boxes."""
    batch_size, anchors = labels.shape[0], anchors[0]
    batch_offset, batch_mask, batch_class_labels = [], [], []
    device, num_anchors = anchors.device, anchors.shape[0]
    for i in range(batch_size):
        label = labels[i, :, :]
        anchors_bbox_map = assign_anchor_to_bbox(label[:, 1:], anchors)
        print('anchors_bbox_map: ', anchors_bbox_map)
        bbox_mask = (anchors_bbox_map >= 0).numpy().repeat(4, 0).reshape(-1, 4).astype('float16')
        print('bbox_mask: ', bbox_mask)
        # Initialize class labels and assigned bounding box coordinates with
        # zeros
        class_labels = tf.zeros(num_anchors)
        assigned_bb = tf.zeros((num_anchors, 4))
        print('class_labels, assigned_bb: ', class_labels, assigned_bb)
        # Label classes of anchor boxes using their assigned ground-truth
        # bounding boxes. If an anchor box is not assigned any, we label its
        # class as background (the value remains zero)
        indices_true = tf.where(anchors_bbox_map >= 0)
        print('indices_true: ', indices_true)
        bb_idx = anchors_bbox_map[indices_true]
        print('bb_idx: ', bb_idx)
        # print(' class_labels: ', label[bb_idx, 0] + 1)
        class_labels = class_labels.numpy()
        class_labels[np.array(indices_true)] = label[bb_idx, 0] + 1
        assigned_bb = assigned_bb.numpy()
        assigned_bb[np.array(indices_true)] = label[bb_idx, 1:]
        print('class_labels, assigned_bb after: ', class_labels, assigned_bb)
        # Offset transformation
        offset = offset_boxes(anchors, assigned_bb) * bbox_mask
        print('offset: ', offset)
        batch_offset.append(offset.reshape(-1))
        batch_mask.append(bbox_mask.reshape(-1))
        batch_class_labels.append(class_labels)
    bbox_offset = tf.stack(batch_offset)
    bbox_mask = tf.stack(batch_mask)
    class_labels = tf.stack(batch_class_labels)
    return (bbox_offset, bbox_mask, class_labels)

In [441]:
labels = multibox_target(tf.expand_dims(anchors, 0), tf.expand_dims(ground_truth, 0))

jaccard:  tf.Tensor(
[[0.         0.         0.        ]
 [0.68026644 0.         0.        ]
 [0.         0.7046982  0.26923078]
 [0.         0.27272734 1.        ]
 [0.         0.77777773 0.43209884]], shape=(5, 3), dtype=float32)
anchors_bbox_map:  tf.Tensor([-1 -1 -1 -1 -1], shape=(5,), dtype=int32)
max_ious, indices:  tf.Tensor([0.         0.68026644 0.7046982  1.         0.77777773], shape=(5,), dtype=float32) tf.Tensor([0 0 1 2 1], shape=(5,), dtype=int64)
anc_i:  tf.Tensor([1 2 3 4], shape=(4,), dtype=int64)
box_j:  tf.Tensor([0 1 2 1], shape=(4,), dtype=int64)
anchors_bbox_map:  [-1  0  1  2  1]
row_discard, col_discard:  tf.Tensor([-1 -1 -1], shape=(3,), dtype=int32) tf.Tensor([-1 -1 -1 -1 -1], shape=(5,), dtype=int32)
box_idx, anc_idx:  0 tf.Tensor(1, shape=(), dtype=int64)
box_idx, anc_idx:  1 tf.Tensor(4, shape=(), dtype=int64)
box_idx, anc_idx:  2 tf.Tensor(3, shape=(), dtype=int64)
anchors_bbox_map:  tf.Tensor([-1  0  1  2  1], shape=(5,), dtype=int32)
bbox_mask:  [[0. 0.

In [443]:
labels[2]

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=array([[0., 1., 2., 2., 2.]], dtype=float32)>

In [444]:
labels[1]

<tf.Tensor: shape=(1, 20), dtype=float16, numpy=
array([[0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1.]], dtype=float16)>

In [446]:
labels[0]

<tf.Tensor: shape=(1, 20), dtype=float32, numpy=
array([[-0.0000000e+00, -0.0000000e+00, -0.0000000e+00, -0.0000000e+00,
         1.5789459e+00, -6.3829857e-01,  1.3721877e+00, -2.1741962e-01,
        -6.2500125e-01,  1.2499987e+00, -6.6765153e-01,  5.8891886e-01,
         0.0000000e+00,  0.0000000e+00,  4.7683693e-06,  4.7683693e-06,
         1.1111119e+00,  0.0000000e+00, -1.2565656e+00,  4.7683693e-06]],
      dtype=float32)>

In [462]:
def offset_inverse(anchors, offset_preds):
    """Predict bounding boxes based on anchor boxes with predicted offsets."""
    anc = box_corner_to_center(anchors)
    pred_bbox_xy = (offset_preds[:, :2] * anc[:, 2:] / 10) + anc[:, :2]
    pred_bbox_wh = tf.math.exp(offset_preds[:, 2:] / 5) * anc[:, 2:]
    pred_bbox = tf.concat((pred_bbox_xy, pred_bbox_wh), axis=1)
    predicted_bbox = box_center_to_corner(pred_bbox)
    return predicted_bbox

In [487]:

def nms(boxes, scores, iou_threshold):
    """Sort confidence scores of predicted bounding boxes."""
    B = tf.argsort(scores, axis=-1, direction='DESCENDING')
    keep = []  # Indices of predicted bounding boxes that will be kept
    while B.numpy().size > 0:
        i = B[0]
        keep.append(i)
        if B.numpy().size == 1: break
        iou = box_iou(boxes[i, :].reshape(-1, 4),
                      boxes[B[1:], :].reshape(-1, 4)).reshape(-1)
        inds = tf.where(iou <= iou_threshold).reshape(-1)
        B = B[inds + 1]
    return tf.convert_to_tensor(keep)

In [545]:

def multibox_detection(cls_probs, offset_preds, anchors, nms_threshold=0.5,
                       pos_threshold=0.009999999):
    """Predict bounding boxes using non-maximum suppression."""
    device, batch_size = cls_probs.device, cls_probs.shape[0]
    anchors = anchors[0]
    num_classes, num_anchors = cls_probs.shape[1], cls_probs.shape[2]
    out = []
    for i in range(batch_size):
        cls_prob, offset_pred = cls_probs[i], offset_preds[i].reshape(-1, 4)
        # conf, class_id = torch.max(cls_prob[1:], 0)
        conf = tf.math.reduce_max(cls_prob[1:], axis=0)
        class_id = tf.math.argmax(cls_prob[1:], axis=0)
        print('conf, class_id: ', conf, class_id)
        predicted_bb = offset_inverse(anchors, offset_pred)
        print('predicted_bb: ', predicted_bb)
        keep = nms(predicted_bb, conf, nms_threshold)
        print('keep: ', keep)
        # Find all non-`keep` indices and set the class to background
        all_idx = tf.range(num_anchors)
        combined = tf.concat((keep, all_idx), axis=0)
        print('combined: ', combined)
        uniques, counts = np.unique(combined, return_counts=True)
        non_keep = uniques[counts == 1]
        all_id_sorted = tf.concat((keep, non_keep), axis=0)
        print('all_id_sorted: ', all_id_sorted)
        class_id_np = class_id.numpy()
        class_id_np[non_keep] = -1
        print('class_id_np: ', class_id_np)
        class_id_np = class_id_np[all_id_sorted.numpy()]
        conf, predicted_bb = conf[all_id_sorted], predicted_bb[all_id_sorted]
        # Here `pos_threshold` is a threshold for positive (non-background)
        # predictions
        
        below_min_idx = (conf < pos_threshold)
        class_id_np[below_min_idx.numpy()] = -1
        
        conf = conf.numpy()
        conf[below_min_idx.numpy()] = 1 - conf[below_min_idx.numpy()]
        
        conf = tf.convert_to_tensor(conf, dtype=tf.float64)
        class_id_np = tf.convert_to_tensor(class_id_np, dtype=tf.float64)
        
        print(class_id_np, conf, predicted_bb)
        pred_info = tf.concat((tf.expand_dims(class_id_np, 1), tf.expand_dims(conf, 1), predicted_bb), axis=1)
        out.append(pred_info)
    return tf.stack(out)

In [502]:
ground_truth = tf.convert_to_tensor([[0, 0.51, 0.35, 0.76, 0.8],
                         [1, 0.34, 0.42, 0.48, 0.78],
                        [1, 0.3, 0.42, 0.4, 0.7]])
anchors = tf.convert_to_tensor([[0, 0.1, 0.2, 0.3], [0.51, 0.37, 0.7, 0.84],
                    [0.34, 0.40, 0.5, 0.72], [0.3, 0.42, 0.4, 0.7],
                    [0.3, 0.42, 0.48, 0.78]])

# offset_preds = tf.convert_to_tensor([0] * (len(anchors.numpy()) * len(anchors.numpy()[0])))
offset_preds = tf.convert_to_tensor([0] * anchors.numpy().size)
# print(offset_preds)

cls_probs = tf.convert_to_tensor([[0] * 5,  # Predicted background likelihood
                      [0.9, 0.65, 0.7, 0.1, 0.02],  # Predicted dog likelihood
                      [0.1, 0.2, 0.3, 0.9, 0.3]])  # Predicted cat likelihood

In [554]:
%%time
output = multibox_detection(tf.expand_dims(cls_probs, 0), tf.expand_dims(offset_preds, 0), tf.expand_dims(anchors, 0), nms_threshold=0.5)
output

conf, class_id:  tf.Tensor([0.9  0.65 0.7  0.9  0.3 ], shape=(5,), dtype=float32) tf.Tensor([0 0 0 1 1], shape=(5,), dtype=int64)
predicted_bb:  tf.Tensor(
[[0.         0.10000001 0.2        0.30000003]
 [0.51000002 0.37000003 0.70000002 0.84      ]
 [0.34000002 0.39999999 0.50000001 0.72000001]
 [0.30000003 0.42       0.40000002 0.7       ]
 [0.3        0.41999997 0.47999997 0.77999996]], shape=(5, 4), dtype=float64)
keep:  tf.Tensor([0 3 2 1], shape=(4,), dtype=int32)
combined:  tf.Tensor([0 3 2 1 0 1 2 3 4], shape=(9,), dtype=int32)
all_id_sorted:  tf.Tensor([0 3 2 1 4], shape=(5,), dtype=int32)
class_id_np:  [ 0  0  0  1 -1]
tf.Tensor([ 0.  1.  0.  0. -1.], shape=(5,), dtype=float64) tf.Tensor([0.89999998 0.89999998 0.69999999 0.64999998 0.30000001], shape=(5,), dtype=float64) tf.Tensor(
[[0.         0.10000001 0.2        0.30000003]
 [0.30000003 0.42       0.40000002 0.7       ]
 [0.34000002 0.39999999 0.50000001 0.72000001]
 [0.51000002 0.37000003 0.70000002 0.84      ]
 [0.3    

<tf.Tensor: shape=(1, 5, 6), dtype=float64, numpy=
array([[[ 0.        ,  0.89999998,  0.        ,  0.10000001,
          0.2       ,  0.30000003],
        [ 1.        ,  0.89999998,  0.30000003,  0.42      ,
          0.40000002,  0.7       ],
        [ 0.        ,  0.69999999,  0.34000002,  0.39999999,
          0.50000001,  0.72000001],
        [ 0.        ,  0.64999998,  0.51000002,  0.37000003,
          0.70000002,  0.84      ],
        [-1.        ,  0.30000001,  0.3       ,  0.41999997,
          0.47999997,  0.77999996]]])>