In [2]:
import time
import torch
import cv2
import numpy as np
from pathlib import Path
from torch.backends import cudnn
from matplotlib import pyplot as plt
from backbone import EfficientDetBackbone
from efficientdet.utils import BBoxTransform, ClipBoxes
from utils.utils import preprocess, invert_affine, postprocess, preprocess_video

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from utils.vis_utils import model_params, model_load
from utils.eff_utils import load_yaml

yaml = load_yaml('datasets/detection/video/video_config.yml')

In [4]:
video_src = "datasets/detection/video/mov_clear/ch01_20220214162545.mp4"  # set int to use webcam, set str to read from a video file

In [5]:
compound_coef = 2
force_input_size = None  # set None to use default size

threshold = 0.2
iou_threshold = 0.2

use_cuda = True
use_float16 = False
cudnn.fastest = True
cudnn.benchmark = True

# color_list = standard_to_bgr(STANDARD_COLORS)
# tf bilinear interpolation is different from any other's, just make do
input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
input_size = (
    input_sizes[compound_coef] if force_input_size is None else force_input_size
)

In [6]:
params = []
for i, yaml_path in enumerate(yaml['project_config_list']):
    project_params = load_yaml(str(yaml_path))
    anchor_ratios = eval(project_params["anchors_ratios"])
    anchor_scales = eval(project_params["anchors_scales"])
    obj_list = project_params["obj_list"]
    params.append(
        model_params(
            yaml["compound_coef"][i], obj_list, anchor_ratios, anchor_scales, yaml['weights_path_list'][i]
        )
    )

In [9]:
params

[model_params(compound_coef=2, obj_list=['spiledmaterial', 'person', 'crack', 'potholes', 'pounding', 'label', 'indicator', 'lamplight', 'animal'], ratios=[(0.6, 1.6), (1.2, 0.8), (1.7, 0.6)], scales=[0.4, 0.5039684199579493, 0.6349604207872798], model_path='weights/untunnel/2.5/efficientdet-d2_99_6000.pth'),
 model_params(compound_coef=2, obj_list=['pounding'], ratios=[(1.3, 0.7), (1.7, 0.6), (2.1, 0.5)], scales=[0.5, 0.6299605249474366, 0.7937005259840997], model_path='logs/pounding/efficientdet-d2_199_12000.pth')]

In [8]:
plt_obj_list = params[0].obj_list
plt_obj_list

['spiledmaterial',
 'person',
 'crack',
 'potholes',
 'pounding',
 'label',
 'indicator',
 'lamplight',
 'animal']

In [12]:
model_list = []
for i in range(len(params)):
    model = model_load(params[i])
    if use_cuda:
        _model = model.cuda()
    if use_float16:
        _model = model.half()
    model_list.append(_model)

In [13]:
cap = cv2.VideoCapture(video_src)


video_frame_cnt = int(cap.get(7))
video_width = int(cap.get(3))
video_height = int(cap.get(4))
video_fps = int(cap.get(5))

video_path = Path(video_src)
# videoname = str(video_path.parents[1] / video_path.stem) + "_output.avi"
videoname = "test.avi"
fourcc = cv2.VideoWriter_fourcc(*"XVID")
writer = cv2.VideoWriter(videoname, fourcc, video_fps, (video_width, video_height), True)

In [14]:
videoname

'test.avi'

In [15]:
from utils.utils import (
    preprocess,
    invert_affine,
    postprocess,
    plot_one_box,
    STANDARD_COLORS,
    standard_to_bgr,
    get_index_label,
)


In [16]:
def display_v2(preds, imgs, obj_list, exclusion_list=[]):
    color_list = standard_to_bgr(STANDARD_COLORS)
    plt.rcParams["figure.figsize"] = (12.8, 7.2)
    for i in range(len(imgs)):
        if len(preds[i]["rois"]) == 0:
            return imgs[i]

        for j in range(len(preds[i]["rois"])):
            (x1, y1, x2, y2) = preds[i]["rois"][j].astype(np.int)
            obj = obj_list[preds[i]["class_ids"][j]]
            score = float(preds[i]["scores"][j])

            if obj not in exclusion_list:
                cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
                plot_one_box(
                    imgs[i],
                    [x1, y1, x2, y2],
                    label=obj,
                    score=score,
                    color=color_list[get_index_label(obj, obj_list)],
                )

        return imgs[i]

In [17]:
empty = {
                    "rois": np.array(()),
                    "class_ids": np.array(()),
                    "scores": np.array(()),
                }

In [18]:
def concat_out(out_1,out_2):
    out = out_1.copy()
    for keys in out_1.keys():
        if len(out_1['rois']) == 0:
            out[keys] = out_2[keys]
        elif len(out_2['rois']) == 0:
            out[keys] = out_1[keys]
        else:
            out[keys] = np.concatenate((out_1[keys],out_2[keys]))
    return out

In [21]:
obj_list = ['pounding']

In [24]:
convert_ind_list = [plt_obj_list.index(obj) for obj in obj_list]
convert_ind_list

[4]

In [30]:
regressBoxes = BBoxTransform()
clipBoxes = ClipBoxes()
ind = 0
while True:
    ret, frame = cap.read()
    ind += 1
    if not ret:
        break

    # frame preprocessing
    ori_imgs, framed_imgs, framed_metas = preprocess_video(frame, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    # model predict
    out_multi = [empty.copy()]
    for i, model in enumerate(model_list):
        obj_list = params[i].obj_list
        convert_ind_list = [plt_obj_list.index(obj) for obj in obj_list]
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(
                x,
                anchors,
                regression,
                classification,
                regressBoxes,
                clipBoxes,
                threshold,
                iou_threshold,
            )

        # result
        out = invert_affine(framed_metas, out)
        for l,output in enumerate(out):
            if len(output['class_ids']) != 0:
                for j in output['class_ids']:
                    if obj_list[j] in yaml['exclusion_list'][i]:
                        output = empty.copy()
                output['class_ids'] = np.array([convert_ind_list[i] for i in output['class_ids']])    
                out_multi[l] = concat_out(out_multi[l],output)
        # print(out_multi)
    img_show = display_v2(out_multi, ori_imgs, plt_obj_list)

    cv2.imwrite("datasets/detection/video/first.png", img_show)
    writer.write(img_show)
    print("current_frame/all_frame:{}/{}".format(ind, video_frame_cnt))
    # if cv2.waitKey(1) & 0xFF == ord("q"):
    #     break

cap.release()
writer.release()
cv2.destroyAllWindows()




current_frame/all_frame:1/4051
current_frame/all_frame:2/4051
current_frame/all_frame:3/4051
current_frame/all_frame:4/4051
current_frame/all_frame:5/4051
current_frame/all_frame:6/4051
current_frame/all_frame:7/4051
current_frame/all_frame:8/4051
current_frame/all_frame:9/4051
current_frame/all_frame:10/4051
current_frame/all_frame:11/4051
current_frame/all_frame:12/4051
current_frame/all_frame:13/4051
current_frame/all_frame:14/4051
current_frame/all_frame:15/4051
current_frame/all_frame:16/4051
current_frame/all_frame:17/4051


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if __name__ == '__main__':


current_frame/all_frame:18/4051
current_frame/all_frame:19/4051
current_frame/all_frame:20/4051
current_frame/all_frame:21/4051
current_frame/all_frame:22/4051
current_frame/all_frame:23/4051
current_frame/all_frame:24/4051
current_frame/all_frame:25/4051
current_frame/all_frame:26/4051
current_frame/all_frame:27/4051
current_frame/all_frame:28/4051
current_frame/all_frame:29/4051
current_frame/all_frame:30/4051
current_frame/all_frame:31/4051
current_frame/all_frame:32/4051
current_frame/all_frame:33/4051
current_frame/all_frame:34/4051
current_frame/all_frame:35/4051
current_frame/all_frame:36/4051
current_frame/all_frame:37/4051
current_frame/all_frame:38/4051
current_frame/all_frame:39/4051
current_frame/all_frame:40/4051
current_frame/all_frame:41/4051
current_frame/all_frame:42/4051
current_frame/all_frame:43/4051
current_frame/all_frame:44/4051
current_frame/all_frame:45/4051
current_frame/all_frame:46/4051
current_frame/all_frame:47/4051
current_frame/all_frame:48/4051
current_

KeyboardInterrupt: 

In [None]:
    img_show = display_v2(out_multi, ori_imgs, plt_obj_list)

    cv2.imwrite("datasets/detection/video/first.png", img_show)
    writer.write(img_show)
    print("current_frame/all_frame:{}/{}".format(ind, video_frame_cnt))
    # if cv2.waitKey(1) & 0xFF == ord("q"):
    #     break

cap.release()
writer.release()
cv2.destroyAllWindows()


In [None]:
obj_list

In [None]:
        img_show = display_v2(out, ori_imgs, obj_list, yaml['exclusion_list'])

    cv2.imwrite("datasets/detection/video/first.png", img_show)
    writer.write(img_show)
    print("current_frame/all_frame:{}/{}".format(ind, video_frame_cnt))
    # if cv2.waitKey(1) & 0xFF == ord("q"):
    #     break

cap.release()
writer.release()
cv2.destroyAllWindows()


In [64]:
out_1=[{'rois': np.array([], dtype=np.float64), 'class_ids': np.array([], dtype=np.float64), 'scores': np.array([], dtype=np.float64)}]

In [26]:
out_2=[{'rois': np.array([[ 587.80426,  585.9524 ,  628.3952 ,  678.4821 ],
       [ 198.14296,  963.407  ,  372.13025, 1034.8644 ]], dtype=np.float32), 'class_ids': np.array([1, 2]), 'scores': np.array([0.60925406, 0.20762798], dtype=np.float32)}]

In [27]:
for j in out_2[0]['class_ids']:
    j = 6
print(out_2)

[{'rois': array([[ 587.80426,  585.9524 ,  628.3952 ,  678.4821 ],
       [ 198.14296,  963.407  ,  372.13025, 1034.8644 ]], dtype=float32), 'class_ids': array([1, 2]), 'scores': array([0.60925406, 0.20762798], dtype=float32)}]


In [28]:
out_2[0]['class_ids'] = np.array([i+1 for i in out_2[0]['class_ids']])
print(out_2)

[{'rois': array([[ 587.80426,  585.9524 ,  628.3952 ,  678.4821 ],
       [ 198.14296,  963.407  ,  372.13025, 1034.8644 ]], dtype=float32), 'class_ids': array([2, 3]), 'scores': array([0.60925406, 0.20762798], dtype=float32)}]


In [69]:
concat_out(out_1[0],out_2[0])

{'rois': array([[ 587.80426,  585.9524 ,  628.3952 ,  678.4821 ],
        [ 198.14296,  963.407  ,  372.13025, 1034.8644 ]], dtype=float32),
 'class_ids': array([1, 2]),
 'scores': array([0.60925406, 0.20762798], dtype=float32)}