<a href="https://colab.research.google.com/github/ykitaguchi77/Instance_segmentation/blob/main/43_YOLOv9_VS_RT_DETRv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# パッケージインストール🕊

In [None]:
!pip install -q onnx onnxruntime-gpu

# CPU・GPU情報を確認🕊

In [None]:
import torch
import subprocess

# CPU名を取得
cpu_result = subprocess.run(['lscpu'], stdout=subprocess.PIPE)
cpu_info = cpu_result.stdout.decode('utf-8').strip()

# CPU名だけを抽出（"Model name"に該当する行を抽出）
for line in cpu_info.split('\n'):
    if "Model name" in line:
        cpu_name = line.split(':')[1].strip()
        break

print(f"CPU: {cpu_name}")

# GPU名を取得
gpu_name = "-"
if torch.cuda.is_available():
    gpu_result = subprocess.run(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], stdout=subprocess.PIPE)
    gpu_name = gpu_result.stdout.decode('utf-8').strip()

    print(f"GPU: {gpu_name}")

# リポジトリクローン🕊

In [None]:
!git clone https://github.com/PINTO0309/PINTO_model_zoo

# 重みダウンロード(RT-DETERv2)🕊


In [None]:
%cd /content/PINTO_model_zoo/460_RT-DETRv2-Wholebody25
!./download.sh
%cd /content

In [None]:
# !cp '/content/PINTO_model_zoo/460_RT-DETRv2-Wholebody25/rtdetrv2_r18vd_120e_wholebody25_1250query_n_batch.onnx' ./
!cp '/content/PINTO_model_zoo/460_RT-DETRv2-Wholebody25/rtdetrv2_r101vd_6x_wholebody25_1250query_n_batch.onnx' ./

In [None]:
!cp 'PINTO_model_zoo/460_RT-DETRv2-Wholebody25/demo/demo_rtdetrv2_onnx_wholebody25.py' ./

# 物体検出(RT-DETRv2のみ)🕊

# テスト動画ダウンロード🕊
NHKクリエイティブ・ライブラリーの「[中国・香港の街並み](https://www2.nhk.or.jp/archives/movies/?id=D0002160854_00000)」です

In [None]:
import gdown

gdown.download(
    'https://drive.google.com/uc?id=1TyRUb5PqxdxWdN1OdRdsJ85YYPWIdv5i',
    'test.mp4',
    quiet=False,
)

# 推論🕊

In [None]:
# model_file: str = 'rtdetrv2_r18vd_120e_wholebody25_1250query_n_batch.onnx'
model_file: str = 'rtdetrv2_r101vd_6x_wholebody25_1250query_n_batch.onnx'

video: str = 'test.mp4'

In [None]:
import cv2
from tqdm import tqdm
from typing import List, Tuple, Dict
from pprint import pprint
import numpy as np
import copy
import time

from demo_rtdetrv2_onnx_wholebody25 import (
    BOX_COLORS ,
    Color,
    RTDETRv2 as RTDETRv2_25,
    is_parsable_to_int,
    draw_dashed_rectangle,
)

disable_headpose_identification_mode: bool = False
disable_left_and_right_hand_discrimination_mode: bool = False
disable_left_and_right_hand_identification_mode: bool = False
disable_gender_identification_mode: bool = False
disable_generation_identification_mode: bool = False

providers: List[Tuple[str, Dict] | str] = [
    'CUDAExecutionProvider',
    'CPUExecutionProvider',
]

print(Color.GREEN('Provider parameters:'))
pprint(providers)

# Model initialization
model = RTDETRv2_25(
    runtime='onnx',
    model_path=model_file,
    obj_class_score_th=0.65,
    attr_class_score_th=0.70,
    providers=providers,
)

cap = None
video_writer = None

cap = cv2.VideoCapture(int(video) if is_parsable_to_int(video) else video)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter.fourcc(*'mp4v')
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
video_writer = cv2.VideoWriter(
    filename='output1.mp4',
    fourcc=fourcc,
    fps=cap_fps,
    frameSize=(w, h),
)

movie_frame_count = 0
total_elapsed_time = 0

with tqdm(total=total_frames, desc="Processing Video") as pbar:
    while True:
        image: np.ndarray = None
        res, image = cap.read()
        if not res:
            break
        movie_frame_count += 1

        debug_image = copy.deepcopy(image)
        debug_image_h = debug_image.shape[0]
        debug_image_w = debug_image.shape[1]

        start_time = time.perf_counter()
        boxes = model(
            image=debug_image,
            disable_generation_identification_mode=disable_generation_identification_mode,
            disable_gender_identification_mode=disable_gender_identification_mode,
            disable_left_and_right_hand_identification_mode=disable_left_and_right_hand_identification_mode,
            disable_headpose_identification_mode=disable_headpose_identification_mode,
        )
        elapsed_time = time.perf_counter() - start_time

        total_elapsed_time += elapsed_time

        cv2.putText(debug_image, model_file, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, model_file, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, 'CPU:'+cpu_name, (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, 'CPU:'+cpu_name, (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, 'GPU:'+gpu_name, (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, 'GPU:'+gpu_name, (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, f'Process Time {elapsed_time*1000:.2f} ms', (10, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, f'Process Time {elapsed_time*1000:.2f} ms', (10, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, f'Total Process Time {total_elapsed_time:.0f} s', (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, f'Total Process Time {total_elapsed_time:.0f} s', (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        # Draw bounding boxes
        for box in boxes:
            classid: int = box.classid
            color = (255, 255, 255)

            if classid == 0:
                # Body
                if not disable_gender_identification_mode:
                    # Body
                    if box.gender == 0:
                        # Male
                        color = (255, 0, 0)
                    elif box.gender == 1:
                        # Female
                        color = (139, 116, 225)
                    else:
                        # Unknown
                        color = (0, 200, 255)
                else:
                    # Body
                    color = (0, 200, 255)
            elif classid == 5:
                # Body-With-Wheelchair
                color = (0, 200, 255)
            elif classid == 6:
                # Body-With-Crutches
                color = (83, 36, 179)
            elif classid == 7:
                # Head
                if not disable_headpose_identification_mode:
                    color = BOX_COLORS[
                        box.head_pose][0] if box.head_pose != -1 else (216, 67,
                                                                       21)
                else:
                    color = (0, 0, 255)
            elif classid == 16:
                # Face
                color = (0, 200, 255)
            elif classid == 17:
                # Eye
                color = (255, 0, 0)
            elif classid == 18:
                # Nose
                color = (0, 255, 0)
            elif classid == 19:
                # Mouth
                color = (0, 0, 255)
            elif classid == 20:
                # Ear
                color = (203, 192, 255)
            elif classid == 21:
                if not disable_left_and_right_hand_identification_mode:
                    # Hands
                    if box.handedness == 0:
                        # Left-Hand
                        color = (0, 128, 0)
                    elif box.handedness == 1:
                        # Right-Hand
                        color = (255, 0, 255)
                    else:
                        # Unknown
                        color = (0, 255, 0)
                else:
                    # Hands
                    color = (0, 255, 0)
            elif classid == 24:
                # Foot
                color = (250, 0, 136)

            if (classid == 0 and not disable_gender_identification_mode) \
                or (classid == 7 and not disable_headpose_identification_mode) \
                or (classid == 21 and not disable_left_and_right_hand_identification_mode):

                if classid == 0:
                    if box.gender == -1:
                        draw_dashed_rectangle(image=debug_image,
                                              top_left=(box.x1, box.y1),
                                              bottom_right=(box.x2, box.y2),
                                              color=color,
                                              thickness=2,
                                              dash_length=10)
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), (255, 255, 255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), color, 2)

                elif classid == 7:
                    if box.head_pose == -1:
                        draw_dashed_rectangle(image=debug_image,
                                              top_left=(box.x1, box.y1),
                                              bottom_right=(box.x2, box.y2),
                                              color=color,
                                              thickness=2,
                                              dash_length=10)
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), (255, 255, 255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), color, 2)

                elif classid == 21:
                    if box.handedness == -1:
                        draw_dashed_rectangle(image=debug_image,
                                              top_left=(box.x1, box.y1),
                                              bottom_right=(box.x2, box.y2),
                                              color=color,
                                              thickness=2,
                                              dash_length=10)
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), (255, 255, 255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), color, 2)

            else:
                cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2),
                              (255, 255, 255), 3)
                cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2),
                              color, 2)

            # Attributes text
            generation_txt = ''
            if box.generation == -1:
                generation_txt = ''
            elif box.generation == 0:
                generation_txt = 'Adult'
            elif box.generation == 1:
                generation_txt = 'Child'

            gender_txt = ''
            if box.gender == -1:
                gender_txt = ''
            elif box.gender == 0:
                gender_txt = 'M'
            elif box.gender == 1:
                gender_txt = 'F'

            attr_txt = f'{generation_txt}({gender_txt})' if gender_txt != '' else f'{generation_txt}'

            headpose_txt = BOX_COLORS[
                box.head_pose][1] if box.head_pose != -1 else ''
            attr_txt = f'{attr_txt} {headpose_txt}' if headpose_txt != '' else f'{attr_txt}'

            cv2.putText(
                debug_image,
                f'{attr_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                (255, 255, 255),
                2,
                cv2.LINE_AA,
            )
            cv2.putText(
                debug_image,
                f'{attr_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                1,
                cv2.LINE_AA,
            )

            handedness_txt = ''
            if box.handedness == -1:
                handedness_txt = ''
            elif box.handedness == 0:
                handedness_txt = 'L'
            elif box.handedness == 1:
                handedness_txt = 'R'
            cv2.putText(
                debug_image,
                f'{handedness_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                (255, 255, 255),
                2,
                cv2.LINE_AA,
            )
            cv2.putText(
                debug_image,
                f'{handedness_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                1,
                cv2.LINE_AA,
            )

            # cv2.putText(
            #     debug_image,
            #     f'{box.score:.2f}',
            #     (
            #         box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
            #         box.y1-10 if box.y1-25 > 0 else 20
            #     ),
            #     cv2.FONT_HERSHEY_SIMPLEX,
            #     0.7,
            #     (255, 255, 255),
            #     2,
            #     cv2.LINE_AA,
            # )
            # cv2.putText(
            #     debug_image,
            #     f'{box.score:.2f}',
            #     (
            #         box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
            #         box.y1-10 if box.y1-25 > 0 else 20
            #     ),
            #     cv2.FONT_HERSHEY_SIMPLEX,
            #     0.7,
            #     color,
            #     1,
            #     cv2.LINE_AA,
            # )

        debug_image2= copy.deepcopy(debug_image)

        if video_writer is not None:
            video_writer.write(debug_image)

        pbar.update(1)

    if video_writer is not None:
        video_writer.release()

    if cap is not None:
        cap.release()

# 物体検出(RT-DETRv2 vs YOLOv9)🕊

# 重みダウンロード(YOLOv9)🕊


In [None]:
%cd /content/PINTO_model_zoo/459_YOLOv9-Wholebody25
!./download_e_withpost.sh
%cd /content

In [None]:
!cp '/content/PINTO_model_zoo/459_YOLOv9-Wholebody25/yolov9_e_wholebody25_post_0100_1x3x736x1280.onnx' ./

In [None]:
!cp 'PINTO_model_zoo/459_YOLOv9-Wholebody25/demo/demo_yolov9_onnx_wholebody25.py' ./

# 推論🕊

In [None]:
model_file1: str = 'yolov9_e_wholebody25_post_0100_1x3x736x1280.onnx'
model_file2: str = 'rtdetrv2_r101vd_6x_wholebody25_1250query_n_batch.onnx'

video: str = 'test.mp4'

In [None]:
import cv2
from tqdm import tqdm
from typing import List, Tuple, Dict
from pprint import pprint
import numpy as np
import copy
import time

from demo_yolov9_onnx_wholebody25 import (
    BOX_COLORS ,
    Color,
    YOLOv9 as YOLOV9_25,
    is_parsable_to_int,
    draw_dashed_rectangle,
)
from demo_rtdetrv2_onnx_wholebody25 import (
    RTDETRv2 as RTDETRv2_25,
)

disable_headpose_identification_mode: bool = False
disable_left_and_right_hand_discrimination_mode: bool = False
disable_left_and_right_hand_identification_mode: bool = False
disable_gender_identification_mode: bool = False
disable_generation_identification_mode: bool = False

providers: List[Tuple[str, Dict] | str] = [
    'CUDAExecutionProvider',
    'CPUExecutionProvider',
]

print(Color.GREEN('Provider parameters:'))
pprint(providers)

# Model initialization
model1 = YOLOV9_25(
    runtime='onnx',
    model_path=model_file1,
    obj_class_score_th=0.35,
    attr_class_score_th=0.70,
    providers=providers,
)
model2 = RTDETRv2_25(
    runtime='onnx',
    model_path=model_file2,
    obj_class_score_th=0.65,
    attr_class_score_th=0.70,
    providers=providers,
)

cap = None
video_writer = None

cap = cv2.VideoCapture(int(video) if is_parsable_to_int(video) else video)
cap_fps = cap.get(cv2.CAP_PROP_FPS)
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter.fourcc(*'mp4v')
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
video_writer = cv2.VideoWriter(
    filename='output2.mp4',
    fourcc=fourcc,
    fps=cap_fps,
    frameSize=(w, h*2),
)

movie_frame_count = 0
total_elapsed_time1 = 0
total_elapsed_time2 = 0

with tqdm(total=total_frames, desc="Processing Video") as pbar:
    while True:
        image: np.ndarray = None
        res, image = cap.read()
        if not res:
            break
        movie_frame_count += 1

        # Model1
        debug_image = copy.deepcopy(image)
        debug_image_h = debug_image.shape[0]
        debug_image_w = debug_image.shape[1]

        start_time = time.perf_counter()
        boxes = model1(
            image=debug_image,
            disable_generation_identification_mode=disable_generation_identification_mode,
            disable_gender_identification_mode=disable_gender_identification_mode,
            disable_left_and_right_hand_identification_mode=disable_left_and_right_hand_identification_mode,
            disable_headpose_identification_mode=disable_headpose_identification_mode,
        )
        elapsed_time = time.perf_counter() - start_time

        total_elapsed_time1 += elapsed_time

        cv2.putText(debug_image, model_file1, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, model_file1, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, 'CPU:'+cpu_name, (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, 'CPU:'+cpu_name, (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, 'GPU:'+gpu_name, (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, 'GPU:'+gpu_name, (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, f'Process Time {elapsed_time*1000:.2f} ms', (10, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, f'Process Time {elapsed_time*1000:.2f} ms', (10, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, f'Total Process Time {total_elapsed_time1:.0f} s', (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, f'Total Process Time {total_elapsed_time1:.0f} s', (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        # Draw bounding boxes
        for box in boxes:
            classid: int = box.classid
            color = (255,255,255)

            if classid == 0:
                # Body
                if not disable_gender_identification_mode:
                    # Body
                    if box.gender == 0:
                        # Male
                        color = (255,0,0)
                    elif box.gender == 1:
                        # Female
                        color = (139,116,225)
                    else:
                        # Unknown
                        color = (0,200,255)
                else:
                    # Body
                    color = (0,200,255)
            elif classid == 5:
                # Body-With-Wheelchair
                color = (0,200,255)
            elif classid == 6:
                # Body-With-Crutches
                color = (83,36,179)
            elif classid == 7:
                # Head
                if not disable_headpose_identification_mode:
                    color = BOX_COLORS[box.head_pose][0] if box.head_pose != -1 else (216,67,21)
                else:
                    color = (0,0,255)
            elif classid == 16:
                # Face
                color = (0,200,255)
            elif classid == 17:
                # Eye
                color = (255,0,0)
            elif classid == 18:
                # Nose
                color = (0,255,0)
            elif classid == 19:
                # Mouth
                color = (0,0,255)
            elif classid == 20:
                # Ear
                color = (203,192,255)
            elif classid == 21:
                if not disable_left_and_right_hand_identification_mode:
                    # Hands
                    if box.handedness == 0:
                        # Left-Hand
                        color = (0,128,0)
                    elif box.handedness == 1:
                        # Right-Hand
                        color = (255,0,255)
                    else:
                        # Unknown
                        color = (0,255,0)
                else:
                    # Hands
                    color = (0,255,0)
            elif classid == 24:
                # Foot
                color = (250,0,136)

            if (classid == 0 and not disable_gender_identification_mode) \
                or (classid == 7 and not disable_headpose_identification_mode) \
                or (classid == 21 and not disable_left_and_right_hand_identification_mode):

                if classid == 0:
                    if box.gender == -1:
                        draw_dashed_rectangle(
                            image=debug_image,
                            top_left=(box.x1, box.y1),
                            bottom_right=(box.x2, box.y2),
                            color=color,
                            thickness=2,
                            dash_length=10
                        )
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, 2)

                elif classid == 7:
                    if box.head_pose == -1:
                        draw_dashed_rectangle(
                            image=debug_image,
                            top_left=(box.x1, box.y1),
                            bottom_right=(box.x2, box.y2),
                            color=color,
                            thickness=2,
                            dash_length=10
                        )
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, 2)

                elif classid == 21:
                    if box.handedness == -1:
                        draw_dashed_rectangle(
                            image=debug_image,
                            top_left=(box.x1, box.y1),
                            bottom_right=(box.x2, box.y2),
                            color=color,
                            thickness=2,
                            dash_length=10
                        )
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, 2)

            else:
                cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), (255,255,255), 3)
                cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2), color, 2)

            # Attributes text
            generation_txt = ''
            if box.generation == -1:
                generation_txt = ''
            elif box.generation == 0:
                generation_txt = 'Adult'
            elif box.generation == 1:
                generation_txt = 'Child'

            gender_txt = ''
            if box.gender == -1:
                gender_txt = ''
            elif box.gender == 0:
                gender_txt = 'M'
            elif box.gender == 1:
                gender_txt = 'F'

            attr_txt = f'{generation_txt}({gender_txt})' if gender_txt != '' else f'{generation_txt}'

            headpose_txt = BOX_COLORS[box.head_pose][1] if box.head_pose != -1 else ''
            attr_txt = f'{attr_txt} {headpose_txt}' if headpose_txt != '' else f'{attr_txt}'

            cv2.putText(
                debug_image,
                f'{attr_txt}',
                (
                    box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
                    box.y1-10 if box.y1-25 > 0 else 20
                ),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                (255, 255, 255),
                2,
                cv2.LINE_AA,
            )
            cv2.putText(
                debug_image,
                f'{attr_txt}',
                (
                    box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
                    box.y1-10 if box.y1-25 > 0 else 20
                ),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                1,
                cv2.LINE_AA,
            )

            handedness_txt = ''
            if box.handedness == -1:
                handedness_txt = ''
            elif box.handedness == 0:
                handedness_txt = 'L'
            elif box.handedness == 1:
                handedness_txt = 'R'
            cv2.putText(
                debug_image,
                f'{handedness_txt}',
                (
                    box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
                    box.y1-10 if box.y1-25 > 0 else 20
                ),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                (255, 255, 255),
                2,
                cv2.LINE_AA,
            )
            cv2.putText(
                debug_image,
                f'{handedness_txt}',
                (
                    box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
                    box.y1-10 if box.y1-25 > 0 else 20
                ),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                1,
                cv2.LINE_AA,
            )

            # cv2.putText(
            #     debug_image,
            #     f'{box.score:.2f}',
            #     (
            #         box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
            #         box.y1-10 if box.y1-25 > 0 else 20
            #     ),
            #     cv2.FONT_HERSHEY_SIMPLEX,
            #     0.7,
            #     (255, 255, 255),
            #     2,
            #     cv2.LINE_AA,
            # )
            # cv2.putText(
            #     debug_image,
            #     f'{box.score:.2f}',
            #     (
            #         box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
            #         box.y1-10 if box.y1-25 > 0 else 20
            #     ),
            #     cv2.FONT_HERSHEY_SIMPLEX,
            #     0.7,
            #     color,
            #     1,
            #     cv2.LINE_AA,
            # )

        debug_image1 = copy.deepcopy(debug_image)

        # Model2
        debug_image = copy.deepcopy(image)
        debug_image_h = debug_image.shape[0]
        debug_image_w = debug_image.shape[1]

        start_time = time.perf_counter()
        boxes = model2(
            image=debug_image,
            disable_generation_identification_mode=disable_generation_identification_mode,
            disable_gender_identification_mode=disable_gender_identification_mode,
            disable_left_and_right_hand_identification_mode=disable_left_and_right_hand_identification_mode,
            disable_headpose_identification_mode=disable_headpose_identification_mode,
        )
        elapsed_time = time.perf_counter() - start_time

        total_elapsed_time2 += elapsed_time

        cv2.putText(debug_image, model_file1, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, model_file1, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, 'CPU:'+cpu_name, (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, 'CPU:'+cpu_name, (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, 'GPU:'+gpu_name, (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, 'GPU:'+gpu_name, (10, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, f'Process Time {elapsed_time*1000:.2f} ms', (10, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, f'Process Time {elapsed_time*1000:.2f} ms', (10, 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        cv2.putText(debug_image, f'Total Process Time {total_elapsed_time2:.0f} s', (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2,
                    cv2.LINE_AA)
        cv2.putText(debug_image, f'Total Process Time {total_elapsed_time2:.0f} s', (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 1, cv2.LINE_AA)

        # Draw bounding boxes
        for box in boxes:
            classid: int = box.classid
            color = (255, 255, 255)

            if classid == 0:
                # Body
                if not disable_gender_identification_mode:
                    # Body
                    if box.gender == 0:
                        # Male
                        color = (255, 0, 0)
                    elif box.gender == 1:
                        # Female
                        color = (139, 116, 225)
                    else:
                        # Unknown
                        color = (0, 200, 255)
                else:
                    # Body
                    color = (0, 200, 255)
            elif classid == 5:
                # Body-With-Wheelchair
                color = (0, 200, 255)
            elif classid == 6:
                # Body-With-Crutches
                color = (83, 36, 179)
            elif classid == 7:
                # Head
                if not disable_headpose_identification_mode:
                    color = BOX_COLORS[
                        box.head_pose][0] if box.head_pose != -1 else (216, 67,
                                                                       21)
                else:
                    color = (0, 0, 255)
            elif classid == 16:
                # Face
                color = (0, 200, 255)
            elif classid == 17:
                # Eye
                color = (255, 0, 0)
            elif classid == 18:
                # Nose
                color = (0, 255, 0)
            elif classid == 19:
                # Mouth
                color = (0, 0, 255)
            elif classid == 20:
                # Ear
                color = (203, 192, 255)
            elif classid == 21:
                if not disable_left_and_right_hand_identification_mode:
                    # Hands
                    if box.handedness == 0:
                        # Left-Hand
                        color = (0, 128, 0)
                    elif box.handedness == 1:
                        # Right-Hand
                        color = (255, 0, 255)
                    else:
                        # Unknown
                        color = (0, 255, 0)
                else:
                    # Hands
                    color = (0, 255, 0)
            elif classid == 24:
                # Foot
                color = (250, 0, 136)

            if (classid == 0 and not disable_gender_identification_mode) \
                or (classid == 7 and not disable_headpose_identification_mode) \
                or (classid == 21 and not disable_left_and_right_hand_identification_mode):

                if classid == 0:
                    if box.gender == -1:
                        draw_dashed_rectangle(image=debug_image,
                                              top_left=(box.x1, box.y1),
                                              bottom_right=(box.x2, box.y2),
                                              color=color,
                                              thickness=2,
                                              dash_length=10)
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), (255, 255, 255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), color, 2)

                elif classid == 7:
                    if box.head_pose == -1:
                        draw_dashed_rectangle(image=debug_image,
                                              top_left=(box.x1, box.y1),
                                              bottom_right=(box.x2, box.y2),
                                              color=color,
                                              thickness=2,
                                              dash_length=10)
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), (255, 255, 255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), color, 2)

                elif classid == 21:
                    if box.handedness == -1:
                        draw_dashed_rectangle(image=debug_image,
                                              top_left=(box.x1, box.y1),
                                              bottom_right=(box.x2, box.y2),
                                              color=color,
                                              thickness=2,
                                              dash_length=10)
                    else:
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), (255, 255, 255), 3)
                        cv2.rectangle(debug_image, (box.x1, box.y1),
                                      (box.x2, box.y2), color, 2)

            else:
                cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2),
                              (255, 255, 255), 3)
                cv2.rectangle(debug_image, (box.x1, box.y1), (box.x2, box.y2),
                              color, 2)

            # Attributes text
            generation_txt = ''
            if box.generation == -1:
                generation_txt = ''
            elif box.generation == 0:
                generation_txt = 'Adult'
            elif box.generation == 1:
                generation_txt = 'Child'

            gender_txt = ''
            if box.gender == -1:
                gender_txt = ''
            elif box.gender == 0:
                gender_txt = 'M'
            elif box.gender == 1:
                gender_txt = 'F'

            attr_txt = f'{generation_txt}({gender_txt})' if gender_txt != '' else f'{generation_txt}'

            headpose_txt = BOX_COLORS[
                box.head_pose][1] if box.head_pose != -1 else ''
            attr_txt = f'{attr_txt} {headpose_txt}' if headpose_txt != '' else f'{attr_txt}'

            cv2.putText(
                debug_image,
                f'{attr_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                (255, 255, 255),
                2,
                cv2.LINE_AA,
            )
            cv2.putText(
                debug_image,
                f'{attr_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                1,
                cv2.LINE_AA,
            )

            handedness_txt = ''
            if box.handedness == -1:
                handedness_txt = ''
            elif box.handedness == 0:
                handedness_txt = 'L'
            elif box.handedness == 1:
                handedness_txt = 'R'
            cv2.putText(
                debug_image,
                f'{handedness_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                (255, 255, 255),
                2,
                cv2.LINE_AA,
            )
            cv2.putText(
                debug_image,
                f'{handedness_txt}',
                (box.x1 if box.x1 + 50 < debug_image_w else debug_image_w - 50,
                 box.y1 - 10 if box.y1 - 25 > 0 else 20),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                1,
                cv2.LINE_AA,
            )

            # cv2.putText(
            #     debug_image,
            #     f'{box.score:.2f}',
            #     (
            #         box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
            #         box.y1-10 if box.y1-25 > 0 else 20
            #     ),
            #     cv2.FONT_HERSHEY_SIMPLEX,
            #     0.7,
            #     (255, 255, 255),
            #     2,
            #     cv2.LINE_AA,
            # )
            # cv2.putText(
            #     debug_image,
            #     f'{box.score:.2f}',
            #     (
            #         box.x1 if box.x1+50 < debug_image_w else debug_image_w-50,
            #         box.y1-10 if box.y1-25 > 0 else 20
            #     ),
            #     cv2.FONT_HERSHEY_SIMPLEX,
            #     0.7,
            #     color,
            #     1,
            #     cv2.LINE_AA,
            # )

        debug_image2 = copy.deepcopy(debug_image)

        if video_writer is not None:
            debug_image = cv2.vconcat([debug_image1, debug_image2])
            video_writer.write(debug_image)

        pbar.update(1)

    if video_writer is not None:
        video_writer.release()

    if cap is not None:
        cap.release()