In [51]:
import tensorflow as tf
from tensorflow import keras 
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# import tensorflow.keras.preprocessing.image as img
import cv2
import skvideo
import math

from utils import scale_boxes, read_classes, read_anchors, get_colors_for_classes, draw_boxes, preprocess_image
from keras_yolo import yolo, yolo_head
from IPython.display import Video

tf.random.set_seed(0)

print(tf.__version__)
tf.config.list_physical_devices()

2.7.0


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [3]:
def filter_boxes(boxes, box_confidences, box_class_probs, threshold = 0.6):
   
    # compute box scores as P(each class | box_confidence)
    box_scores = box_confidences * box_class_probs

    # find the most probably class in each box and its probability
    box_classes = tf.math.argmax(box_scores, axis=-1)
    box_class_scores = tf.math.reduce_max(box_scores, axis=-1)

    # select boxes that have class score above threshold
    mask = box_class_scores > threshold

    # apply mask 
    scores = tf.boolean_mask(box_class_scores, mask=mask)
    boxes = tf.boolean_mask(boxes, mask=mask)
    classes = tf.boolean_mask(box_classes, mask=mask)

    return scores, boxes, classes

def nonmax_suppress(scores, boxes, classes, max_boxes=15, iou_threshold=0.6):

    max_boxes_tensor = tf.Variable(max_boxes, dtype='int32')

    non_max_suppressed_indices = tf.image.non_max_suppression(boxes, scores, max_boxes , iou_threshold=iou_threshold)
    
    scores = tf.gather(scores, non_max_suppressed_indices)
    boxes = tf.gather(boxes, non_max_suppressed_indices)
    classes = tf.gather(classes, non_max_suppressed_indices)

    return scores, boxes, classes

In [47]:
def yolo_boxes_to_corners(box_xy, box_wh):
    """Convert YOLO box predictions to bounding box corners."""
    box_mins = box_xy - (box_wh / 2.)
    box_maxes = box_xy + (box_wh / 2.)

    return tf.keras.backend.concatenate([
        box_mins[..., 1:2],  # y_min
        box_mins[..., 0:1],  # x_min
        box_maxes[..., 1:2],  # y_max
        box_maxes[..., 0:1]  # x_max
    ])

def scale_boxes(boxes, image_shape):
    height = float(image_shape[0])
    width = float(image_shape[1])
    image_dims = keras.backend.stack([height, width, height, width])
    image_dims = keras.backend.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims
    return boxes

def rescale_eval(outputs, image_shape=(720, 1280), max_boxes = 10, threshold=0.6, iou_threshold=0.5):

    box_xy, box_wh, box_confidences, box_class_probs = outputs

    # convert xy, wh boxes to corners
    boxes = yolo_boxes_to_corners(box_xy=box_xy, box_wh=box_wh)

    # filter out boxes with probability of detecting an object less than the threshold
    scores, boxes, classes = filter_boxes(boxes=boxes, box_confidences=box_confidences, box_class_probs=box_class_probs, threshold=threshold)

    # rescale boxes according to image size
    boxes = scale_boxes(boxes, image_shape)

    scores, boxes, classes = nonmax_suppress(scores, boxes, classes, max_boxes=max_boxes, iou_threshold=iou_threshold)

    return scores, boxes, classes

def video_to_image(video_path, save_path, capture_rate=1):
    # capture_rate is number of capture per second
    cap=cv2.VideoCapture(video_path)
    frame_rate = cap.get(5)
    count=0
    while True:
        frame_id = cap.get(1)
        ret, frame = cap.read()
        if ret != True:
            break
        if frame_id % math.floor(frame_rate / capture_rate) == 0:
            outfile = f"{save_path}/frame{frame_id}.jpg"
            cv2.imwrite(outfile, frame)
            count+=1
    print(f"{count} images extracted from input video")

def annotate_image(model, model_image_size, image_path, anchors, class_names, threshold=0.3, iou_threshold=0.5, max_boxes=10):

    image, image_data = preprocess_image(image_path, model_image_size)

    model_output = model(image_data)

    model_output=yolo_head(model_output, anchors, len(class_names))

    out_scores, out_boxes, out_classes = rescale_eval(model_output, [image.size[1],  image.size[0]], max_boxes, threshold, iou_threshold)

    print(f'Found {len(out_boxes)} boxes for {image_path}')

    colors = get_colors_for_classes(len(class_names))

    draw_boxes(image, out_boxes, out_classes, class_names, out_scores)

    return image

def annotate_images(model, model_image_size, input_path, output_path, anchors, class_names, threshold=0.3, iou_threshold=0.5, max_boxes=10):
    count=0

    os.makedirs(output_path, exist_ok=True)
    for image_path in [os.path.join(input_path, p) for p in os.listdir(input_path) if p.endswith('.jpg')]:
           
        image = annotate_image(model, model_image_size, image_path, anchors, class_names, threshold, iou_threshold, max_boxes)

        image.save(f'{output_path}/{os.path.basename(image_path)}', quality=100)
        count+=1
    print(f'Annotated {count} images')
    
def images_to_video(input_path, output_path, frame_rate=10):

    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    img = cv2.imread(image_paths[0])

    fourcc = cv2.VideoWriter_fourcc(*'DIVX') 
    video = cv2.VideoWriter(output_path, fourcc, 8, (img.shape[1], img.shape[0]))

    # writer = skvideo.io.FFmpegWriter("skvideo.mp4")
    for image_path in image_paths:
        img = cv2.imread(image_path)
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        video.write(img)

    video.release()
        

In [49]:
video_to_image('../video/tokyo.mp4', '../images', 1)

1530 images extracted from input video


In [52]:
annotate_images(model, (608, 608), '../images', '../annotated_images', anchors, class_names)

Found 2 boxes for ../images/frame35293.0.jpg
traffic light 0.42 (1092, 525) (1163, 591)
traffic light 0.38 (1089, 515) (1207, 597)
Found 6 boxes for ../images/frame20532.0.jpg
car 0.89 (1066, 770) (1399, 973)
bus 0.41 (1069, 713) (1261, 784)
traffic light 0.36 (655, 644) (689, 679)
car 0.33 (849, 738) (915, 785)
car 0.33 (1029, 742) (1098, 778)
car 0.33 (739, 733) (784, 771)
Found 0 boxes for ../images/frame17835.0.jpg
Found 2 boxes for ../images/frame19227.0.jpg
car 0.38 (914, 739) (963, 793)
car 0.33 (845, 745) (881, 782)
Found 3 boxes for ../images/frame24853.0.jpg
truck 0.79 (1575, 319) (1916, 995)
car 0.56 (1245, 738) (1373, 828)
truck 0.41 (1524, 622) (1656, 911)
Found 2 boxes for ../images/frame32016.0.jpg
traffic light 0.49 (1504, 746) (1563, 794)
traffic light 0.39 (1266, 735) (1295, 765)
Found 5 boxes for ../images/frame38048.0.jpg
car 0.78 (1020, 637) (1153, 779)
car 0.74 (1214, 686) (1402, 802)
car 0.44 (945, 669) (992, 707)
truck 0.38 (675, 586) (788, 738)
car 0.33 (1139, 

In [53]:
input_path='../annotated_images'
output_path='../annotated_video/tokyo.avi'
images_to_video(input_path=input_path, output_path=output_path, frame_rate=10)

In [2]:
class_names = read_classes("../model_data/coco_classes.txt")
anchors = read_anchors("../model_data/yolo_anchors.txt")

model = tf.keras.models.load_model('yolov2.h5', compile=False)

2021-11-17 15:00:27.661197: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
