In [1]:
import tensorflow as tf 
import os 
import numpy as np
import glob 
from pathlib import Path
from PIL import Image
import re 

In [2]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3')
Number of devices: 4


In [3]:
class YOLODataReader(object):
    def __init__(self, img_dir, label_dir):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.imgs_path = glob.glob(f"{self.img_dir}/*.jpg")
        self.idx = len(self.imgs_path)

    def __getitem__(self, index):
        img_path = self.imgs_path[index]
        label_path = self.find_label(img_path)
        img = self.load_img(img_path)
        label = self.load_label(label_path)
        return img, label 

    def __len__(self):
        return len(self.imgs_path)

    def iter(self):
        for i in range(self.idx):
            yield self[i]

    def load_img(self, img_path):
        img = tf.io.read_file(img_path)
        img = tf.image.decode_image(img, channels=3, dtype=tf.float32)
        img = tf.image.resize(img, (640,640))
        return img
    
    
    def load_label(self, label_path):
        if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
            labels = []
            with open(label_path, "r") as f:
                for line in f.readlines():
                    label = line.strip().split()
                    label = list(map(float, label)) # [cls, x, y, w, h]
                    labels.append(label)    
            labels = np.array(labels)
            labels = np.roll(labels, -1) # [cls, x, y, w, h] ->  [x y w h cls]
        
        else:
            labels = np.zeros((1, 5), np.float32)

        return labels
    
    
    def find_label(self, img_path):
        label_path = re.sub(self.img_dir, self.label_dir, img_path)
        label_path = re.sub(".jpg", ".txt", label_path)
        return label_path if os.path.exists(label_path) else None

    
    def load_img_and_label(self, idx):
        img_path = self.imgs[idx]
        img = self.load_img(img_path)
        label_path = self.find_label(img_path)
        label = self.load_label(label_path)
        return img, label

In [4]:
class DataLoader(object):
    '''
    data pipeline from data_reader (image,label) to tf.data
    '''
    def __init__(self, data_reader, anchors, stride, img_size=640, anchor_assign_method='wh',
                 anchor_positive_augment=True):
        self.data_reader = data_reader
        self.img_size = img_size
        self.anchor_label = AnchorLabeler(anchors,
                                          grids=img_size / stride,
                                          img_size=img_size,
                                          assign_method=anchor_assign_method,
                                          extend_offset=anchor_positive_augment)
        
    def __call__(self, batch_size=8, anchor_label=True):
        dataset = tf.data.Dataset.from_generator(self.data_reader.iter,
                                                 output_types=(tf.float32, tf.float32),
                                                 output_shapes=([self.img_size, self.img_size, 3], [None, 5]))
        
        if anchor_label:  # when train
            dataset = dataset.map(self.transform, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
        return dataset
    
    
    def transform(self, image, label):
        label_encoder = self.anchor_label.encode(label)
        return image, label_encoder
        


In [5]:
class AnchorLabeler(object):
    # transfer the annotated label to model target by anchor encoding, to calculate anchor based loss next step
    def __init__(self, anchors, grids, img_size=640, assign_method='wh', extend_offset=True, rect_style='rect4', anchor_match_threshold=4.0):  # 4.0 or 0.3
        self.anchors = anchors  # from yaml.anchors to Detect.anchors, w/h based on grid coordinators
        self.grids = grids
        self.img_size = img_size
        self.assign_method = assign_method
        self.extend_offset = extend_offset
        self.rect_style = rect_style
        self.anchor_match_threshold = anchor_match_threshold

    def encode(self, labels):
        ''' This is important for Yolo series.
        key part is: assign the label to which anchor and which grid, new encoding method of V4 solved the grid sensitivity problem
        labels: (n_bs * n_gt * 5), x/y/w/h/class, normalized image coordinators
        anchors: (3 * 3 * 2), scale * anchor_per_scale * wh,
        return: [[], [], []]
        '''
        
        self.num_scales = self.anchors.shape[0]
        self.n_anchor_per_scale = self.anchors.shape[1]
        y_anchor_encode = []
        gain = tf.ones(5, tf.float32)

        for i in range(self.num_scales):
            anchor = self.anchors[i]
            grid_size = tf.cast(self.grids[i], tf.int32)
            # 6 (xywh objectness cls)
            y_true = tf.zeros([grid_size, grid_size, self.n_anchor_per_scale, 6], tf.float32)
            gain = tf.tensor_scatter_nd_update(gain, [[0], [1], [2], [3]], [grid_size] * 4)
            scaled_labels = labels * gain  # label coordinator now is the same with anchors

            if labels is not None:
                # (n_bs * n_gt * 2)
                gt_wh = scaled_labels[..., 2:4]  # n_gt * 2
                if self.assign_method == 'wh':
                    assert self.anchor_match_threshold > 1, 'threshold is totally different for wh and iou assign'
                    matched_matrix = self.assign_criterion_wh(gt_wh, anchor, self.anchor_match_threshold)
                elif self.assign_method == 'iou':
                    assert self.anchor_match_threshold < 1, 'threshold is totally different for wh and iou assign'
                    matched_matrix = self.assign_criterion_iou(gt_wh, anchor, self.anchor_match_threshold)
                else:
                    raise ValueError

                n_gt = tf.shape(gt_wh)[0]
                assigned_anchor = tf.tile(tf.reshape(tf.range(self.n_anchor_per_scale), (self.n_anchor_per_scale, 1)),(1, n_gt))

                assigned_anchor = tf.expand_dims(assigned_anchor[matched_matrix], 1)  # filter
                assigned_anchor = tf.cast(assigned_anchor, tf.int32)

                assigned_label = tf.tile(tf.expand_dims(scaled_labels, 0), [self.n_anchor_per_scale, 1, 1])
                assigned_label = assigned_label[matched_matrix]

                if self.extend_offset:
                    assigned_label, assigned_anchor, grid_offset = self.enrich_pos_by_position(
                        assigned_label, assigned_anchor, gain, matched_matrix)
                else:
                    grid_offset = tf.zeros_like(assigned_label[:, 0:2])

                assigned_grid = tf.cast(assigned_label[..., 0:2] - grid_offset, tf.int32)  # n_matched * 2
                assigned_grid = tf.clip_by_value(assigned_grid, clip_value_min=0, clip_value_max=grid_size-1)
                
                # tensor: grid * grid * 3 * 6, indices（sparse index）: ~n_gt * gr * gr * 3, updates: ~n_gt * 6
                assigned_indices = tf.concat([assigned_grid[:, 1:2], assigned_grid[:, 0:1], assigned_anchor],
                                             axis=1)

                xy, wh, clss = tf.split(assigned_label, (2, 2, 1), axis=-1)
                xy = xy / gain[0] * self.img_size
                wh = wh / gain[1] * self.img_size
                obj = tf.ones_like(clss)
                assigned_updates = tf.concat([xy, wh, obj, clss], axis=-1)
        
                y_true = tf.tensor_scatter_nd_update(y_true, assigned_indices, assigned_updates)
            y_anchor_encode.append(y_true)
        
        return tuple(y_anchor_encode)  # add a tuple is important here, otherwise raise an error

    def assign_criterion_wh(self, gt_wh, anchors, anchor_threshold):
        # return: please note that the v5 default anchor_threshold is 4.0, related to the positive sample augment
        gt_wh = tf.expand_dims(gt_wh, 0)  # => 1 * n_gt * 2
        anchors = tf.expand_dims(anchors, 1)  # => n_anchor * 1 * 2
        ratio = gt_wh / anchors  # => n_anchor * n_gt * 2
        matched_matrix = tf.reduce_max(tf.math.maximum(ratio, 1 / ratio),
                                       axis=2) < anchor_threshold  # => n_anchor * n_gt
        return matched_matrix

    def assign_criterion_iou(self, gt_wh, anchors, anchor_threshold):
        # by IOU, anchor_threshold < 1
        box_wh = tf.expand_dims(gt_wh, 0)  # => 1 * n_gt * 2
        box_area = box_wh[..., 0] * box_wh[..., 1]  # => 1 * n_gt

        anchors = tf.cast(anchors, tf.float32)  # => n_anchor * 2
        anchors = tf.expand_dims(anchors, 1)  # => n_anchor * 1 * 2
        anchors_area = anchors[..., 0] * anchors[..., 1]  # => n_anchor * 1

        inter = tf.math.minimum(anchors[..., 0], box_wh[..., 0]) * tf.math.minimum(anchors[..., 1],
                                                                                   box_wh[..., 1])  # n_gt * n_anchor
        iou = inter / (anchors_area + box_area - inter + 1e-9)

        iou = iou > anchor_threshold
        return iou

    def enrich_pos_by_position(self, assigned_label, assigned_anchor, gain, matched_matrix, rect_style='rect4'):
        # using offset to extend more postive result, if x
        assigned_xy = assigned_label[..., 0:2]  # n_matched * 2
        offset = tf.constant([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], tf.float32)
        grid_offset = tf.zeros_like(assigned_xy)

        if rect_style == 'rect2':
            g = 0.2  # offset
        elif rect_style == 'rect4':
            g = 0.5  # offset
            matched = (assigned_xy % 1. < g) & (assigned_xy > 1.)
            matched_left = matched[:, 0]
            matched_up = matched[:, 1]
            matched = (assigned_xy % 1. > (1 - g)) & (assigned_xy < tf.expand_dims(gain[0:2], 0) - 1.)
            matched_right = matched[:, 0]
            matched_down = matched[:, 1]

            assigned_anchor = tf.concat([assigned_anchor, assigned_anchor[matched_left], assigned_anchor[matched_up],
                                         assigned_anchor[matched_right], assigned_anchor[matched_down]], axis=0)
            assigned_label = tf.concat([assigned_label, assigned_label[matched_left], assigned_label[matched_up],
                                        assigned_label[matched_right], assigned_label[matched_down]], axis=0)

            grid_offset = g * tf.concat(
                [grid_offset, grid_offset[matched_left] + offset[1], grid_offset[matched_up] + offset[2],
                 grid_offset[matched_right] + offset[3], grid_offset[matched_down] + offset[4]], axis=0)

        return assigned_label, assigned_anchor, grid_offset

In [6]:
anchors = np.array([[[10,13], [16,30], [33,23]],       # P3/8
           [[30,61], [62,45], [59,119]],      # P4/16
           [[116,90], [156,198], [373,326]]], dtype=np.float32)  # P5/32
grids = np.array([8, 16, 32])
img_size = np.array(640)
anchor_assign_method = "wh"
anchor_positive_augment = True
anchorlabeler = AnchorLabeler(anchors, grids)

In [7]:
img_dir = "/app/data/images/small_set/"
label_dir = "/app/data/labels/small_set/"
yolo_reader = YOLODataReader(img_dir, label_dir)
data_loader = DataLoader(yolo_reader,
                         anchors,
                         grids,
                         img_size,
                         anchor_assign_method,
                         anchor_positive_augment)
train_dataset = data_loader(batch_size=1, anchor_label=True)

In [8]:
for x, y in train_dataset:
    print(x.shape)
    print(y[0].shape)
    print(y[1].shape)    
    print(y[2].shape)    

(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
(1, 640, 640, 3)
(1, 80, 80, 3, 6)
(1, 40, 40, 3, 6)
(1, 20, 20, 3, 6)
