In [10]:
import cv2
import numpy as np
import argparse
from pathlib import Path
from torch.backends import cudnn
from matplotlib import pyplot as plt

import sys 
sys.path.append("..")
from backbone import EfficientDetBackbone
from efficientdet.utils import BBoxTransform, ClipBoxes
from utils.eff_utils import load_yaml
from utils.vis_utils import visualization_bbox,load_onnx,eval_onnx,display_bbox
from utils.utils import (
    preprocess, 
    invert_affine, 
    postprocess, 
    preprocess_video,
    plot_one_box,
    STANDARD_COLORS,
    standard_to_bgr,
    get_index_label,
)

import torch
path = Path.cwd().parent

In [11]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [12]:
project_name = "0509split"
onnx_path = 'efficientdet-d2.onnx'
compound_coef = 2
video_src = "/home/mazheng/waste-detection/Yet-Another-EfficientDet-Pytorch/datasets/0414split/video/rec0049_4.mp4"
threshold = 0.2
iou_threshold = 0.2
exclusion_list = ["pounding", "spiledmaterial"]

use_cuda = True
use_float16 = False
cudnn.fastest = True
cudnn.benchmark = True

In [13]:
img_path = path / 'datasets/{}/val'.format(project_name)
ann_json = path / 'datasets/{}/annotations/instances_val.json'.format(project_name)
yaml_path = path / 'projects/{}.yml'.format(project_name)

project_params = load_yaml(str(yaml_path))
ratios=eval(project_params['anchors_ratios'])
scales=eval(project_params['anchors_scales'])
obj_list = project_params['obj_list']

input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
input_size = input_sizes[compound_coef]

In [14]:
ort_session = load_onnx(onnx_path)

2022-05-13 22:07:57.133493794 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'backbone_net.model._blocks.16._bn2.num_batches_tracked'. It is not used by any node and should be removed from the model.
2022-05-13 22:07:57.133570346 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'classifier.bn_list.0.1.num_batches_tracked'. It is not used by any node and should be removed from the model.
2022-05-13 22:07:57.133594501 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'bifpn.4.conv3_up.bn.num_batches_tracked'. It is not used by any node and should be removed from the model.
2022-05-13 22:07:57.133634990 [W:onnxruntime:, graph.cc:3559 CleanUnusedInitializersAndNodeArgs] Removing initializer 'backbone_net.model._blocks.0._bn2.num_batches_tracked'. It is not used by any node and should be removed from the model.
2022-05-13 22:07:57.133660056 [W:onnxruntime:, graph.cc:3559 Cle

In [15]:
video_path = Path(video_src)
videoname = str(video_path.parents[1] / video_path.stem) + "_output.avi"
cap = cv2.VideoCapture(video_src)


video_frame_cnt = int(cap.get(7))
video_width = int(cap.get(3))
video_height = int(cap.get(4))
video_fps = int(cap.get(5))
# print(video_src.type)
video_path = Path(video_src)
videoname = str(video_path.parents[1] / video_path.stem) + "_output.avi"

fourcc = cv2.VideoWriter_fourcc(*"XVID")
writer = cv2.VideoWriter(
    videoname, fourcc, video_fps, (video_width, video_height), True
)

In [16]:
def display_v2(preds, imgs, obj_list, exclusion_list=[]):
    color_list = standard_to_bgr(STANDARD_COLORS)
    plt.rcParams["figure.figsize"] = (12.8, 7.2)
    for i in range(len(imgs)):
        if len(preds[i]["rois"]) == 0:
            return imgs[i]

        for j in range(len(preds[i]["rois"])):
            (x1, y1, x2, y2) = preds[i]["rois"][j].astype(np.int)
            obj = obj_list[preds[i]["class_ids"][j]]
            score = float(preds[i]["scores"][j])

            if obj not in exclusion_list:
                cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
                plot_one_box(
                    imgs[i],
                    [x1, y1, x2, y2],
                    label=obj,
                    score=score,
                    color=color_list[get_index_label(obj, obj_list)],
                )

        return imgs[i]

In [17]:
def to_numpy(tensor):
    return (
        tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
    )

In [18]:
regressBoxes = BBoxTransform()
clipBoxes = ClipBoxes()

ind = 0
while True:
    ret, frame = cap.read()
    ind += 1
    if not ret:
        break

    # frame preprocessing
    ori_imgs, framed_imgs, framed_metas = preprocess_video(frame, max_size=input_size)

    x = np.stack(framed_imgs, 0)
    x = np.moveaxis(x, [0, 3, 1, 2], [0, 1, 2, 3])

    ort_inputs = {ort_session.get_inputs()[0].name: x}
    _, _, _, _, _, regression, classification, anchors = ort_session.run(
        None, ort_inputs
    )
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    out = postprocess(
        x,
        torch.from_numpy(anchors),
        torch.from_numpy(regression),
        torch.from_numpy(classification),
        regressBoxes,
        clipBoxes,
        threshold,
        iou_threshold,
    )
    out = invert_affine(framed_metas, out)

    img_show = display_v2(out, ori_imgs, obj_list, exclusion_list=exclusion_list)
    # print("img_show:", len(img_show))
    # print("frame1:", len(ori_imgs[0]))
    # print("frame2:", len(frame[0]))
    # show frame by frame
    # cv2.imshow("frame", img_show)
    # cv2.imwrite("datasets/detection/video/first.png", img_show)
    writer.write(img_show)
    # if cv2.waitKey(1) & 0xFF == ord("q"):
    #     break
    print("current_frame/all_frame:{}/{}".format(ind, video_frame_cnt))

cap.release()
writer.release()
cv2.destroyAllWindows()

current_frame/all_frame:1/50168
current_frame/all_frame:2/50168
current_frame/all_frame:3/50168
current_frame/all_frame:4/50168
current_frame/all_frame:5/50168


KeyboardInterrupt: 