In [1]:
# 检查 PyTorch 版本
import torch, torchvision
print(torch.__version__)
print(torch.cuda.is_available())

# 查看安装结果
!mim list


2.3.0+cu118
False
Package    Version    Source
---------  ---------  ---------------------------------------------------
mmcv       2.2.0      https://github.com/open-mmlab/mmcv
mmdet      3.3.0      d:\codes\python\ai\4_baseline\openmmlab\mmdetection
mmengine   0.10.4     https://github.com/open-mmlab/mmengine
mmyolo     0.6.0      d:\codes\python\ai\4_baseline\openmmlab\mmyolo


## 模型分析步骤  
完整步骤如下：  
数据集准备：tools/misc/download_dataset.py  
使用 labelme 和算法进行辅助标注：demo/image_demo.py + labelme  
使用脚本转换成 COCO 数据集格式：tools/dataset_converters/labelme2coco.py  
数据集划分为训练集、验证集和测试集：tools/misc/coco_split.py  
构建 config 文件 : python tools/misc/print_config.py /PATH/TO/CONFIG  
数据集可视化分析：tools/analysis_tools/dataset_analysis.py configs/my/yolov8_swin_rip.py  
优化 anchor 尺寸：tools/analysis_tools/optimize_anchors.py configs/my/yolov8_swin_rip.py   
可视化数据处理部分：tools/analysis_tools/browse_dataset.py configs/my/yolov8_swin_rip.py  
启动训练：tools/train.py  
模型推理：demo/image_demo.py  
模型部署  

## 常用命令
### 下载权重+推理验证

In [None]:
# 步骤 1. 我们需要下载配置文件和模型权重文件。
! mim download mmyolo --config yolov5_s-v61_syncbn_fast_8xb16-300e_coco --dest .

# 步骤 2. 推理验证(源码安装)
! python demo/image_demo.py demo/demo.jpg \
                          yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
                          yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth

# 可选参数
# --out-dir ./output *检测结果输出到指定目录下，默认为./output, 当--show参数存在时，不保存检测结果
# --device cuda:0    *使用的计算资源，包括cuda, cpu等，默认为cuda:0
# --show             *使用该参数表示在屏幕上显示检测结果，默认为False
# --score-thr 0.3    *置信度阈值，默认为0.3

In [None]:
# 步骤 2. 推理验证(MIM安装)
from mmdet.apis import init_detector, inference_detector
config_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
checkpoint_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
model = init_detector(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
inference_detector(model, 'demo/demo.jpg')
# 你将会看到一个包含 DetDataSample 的列表，预测结果在 pred_instance 里，包含有预测框、预测分数 和 预测类别。

### Yolo V8 + Swin

In [None]:
# 启动训练
!python tools/train.py configs/yolov8/yolov8_s_swin_t-v61_1xb2-1e_coco128.py

### Yolo V5 + Swin

In [None]:
# 启动训练
!python tools/train.py configs/yolov5/yolov5_s_swin_t-v61_4xb4-30e_rip.py

### 更换网络组件

In [None]:
# 使用第三方网络模块
# 安装 timm
# %pip install timm

# 查看 timm 中支持的模型
import timm
model_names = timm.list_models(pretrained=True)
print(f'num of models: {len(model_names)}')
print(model_names)

# 示例
# 如果想将 timm 中 `mobilevitv2_050` 作为 `YOLOv5` 的主干网络，则配置文件如下：
# 导入 mmcls.models 使得可以调用 mmcls 中注册的模块
custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)

model = dict(
    backbone=dict(
        _delete_=True,  # 将 _base_ 中关于 backbone 的字段删除
        type='mmcls.TIMMBackbone',  # 使用 mmcls 中的 timm 主干网络
        model_name='mobilevitv2_050',  # 使用 TIMM 中的 mobilevitv2_050
        # ...
    ))

In [None]:
# 使用自监督

# 安装 mmselfsup
!mim install "mmselfsup>=1.0.0rc3"

# import mmselfsup
# model_names = mmselfsup.list_models(pretrained=True)
# print(f'num of models: {len(model_names)}')
# print(model_names)

# 导入 mmselfsup.models 使得可以调用 mmselfsup 中注册的模块
custom_imports = dict(imports=['mmselfsup.models'], allow_failed_imports=False)

model = dict(
    backbone=dict(
        _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
        type='mmselfsup.ResNet',
        # ...
    ))

### 确定主干网络输出通道数

In [None]:
## 3 彩蛋：如何确定主干网络输出通道数
# PPYOLO-E 最大模型 x 中的 `widen_factor` 为 1.25。假设我们想要构建一个更大的网络，将  `widen_factor` 设为 1.5，此时其主干网络 `PPYOLOECSPResNet` 的输出通道数会是多少呢？

import torch
from mmyolo.models import PPYOLOECSPResNet
from mmyolo.utils import register_all_modules
from mmdet.models.backbones import SwinTransformer
# 注册所有模块
register_all_modules()

imgs = torch.randn(1, 3, 1150, 1150)
out_indices=(1, 2, 3)
# out_indices=(0,1,2,3)
model = SwinTransformer(init_cfg=r'D:\Codes\Python\AI\6_Model\swin_v2_b-781e5279.pth', out_indices=out_indices)
# model = SwinTransformer(out_indices=out_indices)
out = model(imgs)
out_shapes = [out[i].shape for i in range(len(out_indices))]
print(out_shapes)

### 打印PTH文件结构

In [None]:
import torch
if __name__ == '__main__':
    model_pth = r'D:\Codes\Python\AI\6_Model\orcn-swin-t-dota-latest.pth'
    net = torch.load(model_pth, map_location=torch.device('cpu'))
    for key, value in net["state_dict"].items():
        print(key,value.size(),sep="  ")

### PTH转为ONNX

```python
torch.onnx.export(
    model,  # 要导出的模型
    args, #  模型的输入参数,输入参数只需满足shape正确
    onnx_export_filepath, # 转换输出的onnx模型的路径
    export_params=True, # true表示导出trained model，否则untrained model。默认即可
    verbose=False, #  true表示打印调试信息
    input_names=None, # 指定输入节点名称
    output_names=None, # 指定输出节点名称
    do_constant_folding=True, # 是否使用常量折叠，默认即可
    dynamic_axes=None, # 通过dynamic_axes来指定输入tensor的哪些参数可变
    opset_version=9 # 指定onnx的opset版本，版本过低的话，不支持upsample等操作
)

In [None]:
import torch
model = torch.load(r'D:\Codes\Python\AI\6_Model\orcn-swin-t-dota-latest.pth')
model.eval()
input_names = ['input']
output_names = ['output']
x = torch.randn(1,3,1150,1150,requires_grad=True)
torch.onnx.export(model, x, r'D:\Codes\Python\AI\6_Model\orcn-swin-t-dota-latest.onnx', input_names=input_names, output_names=output_names, verbose='True')

### 利用PTH文件推理

In [None]:
import torch
from torchvision import datasets, models, transforms
import cv2
import numpy as np
from PIL import Image
def pth_push(img):
    model=#这里调用你的模型
    model_path=''#pth权重文件地址
    device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')#cpu or gpu
    model.load_state_dict(torch.load(self.model_path, map_location=device))#加载pth文件
    model  = model.eval()
    transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
])#对图片进行resize并转换成tensor
    inputs = transform(img)
    inputs=torch.unsqueeze(inputs, 0)#添加一个维度
    inputs = inputs.to(device)#把图片也转成相应的设备cuda or cpu
    #进行推理
    outputs = model(inputs)
    #根据自己要解决的问题进行解码
    outputs1=outputs.tolist()
    outputs1 = torch.from_numpy(np.array(outputs1))
    outputs_softmax = torch.softmax(outputs1, dim=1).numpy()[:, 1].tolist()[0]
if __name__ == "__main__":  
    i='.jpg' 
    image = Image.open(i)
    pth_push(image)

## 模型推理

In [None]:
! python demo/video_demo.py D:/Codes/Out/Python/rip/houhai/video/rip_01.mp4 \
                                configs/my/dino-4s_r50_rip.py \
                                model/dino_4s_e12.pth \
                                --out D:/Codes/Out/Python/rip/vis/houhai-res/dino_4s_e12/video/5/rip_01.mp4 \
                                --score-thr 5 \
                                --device cuda:0

In [None]:
! python demo/video_demo.py demo/houhai/video/rip_01.mp4 \
                                configs/my/yolov5_0705.py \
                                model/yolov5_0705.pth \
                                --out demo/vis/houhai-res/yolov5_0705/0.05/video/rip_01.mp4 \
                                --score-thr 0.05 \
                                --device cuda:0

In [None]:
! python demo/image_demo.py demo/gaolong/02 \
                                configs/my/yolov5_0705.py \
                                model/yolov5_0705.pth \
                                --out-dir demo/vis/gaolong-res/yolov5_0705\0.02\02 \
                                --score-thr 0.02 \
                                --device cuda:0

In [None]:
! python demo/image_demo.py demo/sansha/01 \
                                configs/my/yolov5_0705.py \
                                model/yolov5_0705.pth \
                                --out-dir demo/vis/sansha-res/yolov5_0705\0.05\03 \
                                --score-thr 0.05 \
                                --device cuda:0

In [10]:
! python demo/image_infer_rp.py E:\Data\Research\Rip\video\images\01 \
                                my/dino-4s_r50_rip_video.py \
                                model/best_video_58.run \
                                --out-dir E:\Data\Research\Rip\video\result\dino_4s\0.05\02 \
                                --score-thr 0.05 \
                                --device cpu \
                                --area-thr 50000000.0 

Loads checkpoint by local backend from path: model/best_video_58.run
[                                                  ] 0/10, elapsed: 0s, ETA:
[>>>                               ] 1/10, 0.3 task/s, elapsed: 4s, ETA:    35s------------len(result.pred_instances)-----------
300
------------pred_instances > args.score_thr -----------
9
<InstanceData(

    META INFORMATION

    DATA FIELDS
    bboxes: tensor([[ 539.4692,  759.6086,  790.6314,  849.0701],
                [ 542.1963,  759.7825,  790.0408,  848.8260],
                [ 541.8483,  759.1146,  788.5239,  849.1544],
                [ 902.7418,  741.6548, 1553.4702,  872.3153],
                [ 542.3768,  759.1019,  789.8576,  849.2555],
                [ 539.9794,  759.1732,  790.0869,  848.5238],
                [ 909.3704,  740.2924, 1549.5333,  874.1241],
                [ 894.8684,  741.4972, 1553.1953,  874.6537],
                [ 904.4288,  741.9391, 1544.9486,  873.5610]])
    scores: tensor([0.8789, 0.3308, 0.2622, 0.



In [3]:
! python demo/image_infer_rp.py E:\Data\Research\Rip\rs\images\02 \
                                my/dino-4s_r50_rip_rs.py \
                                model/best_rs_78.run \
                                --out-dir E:\Data\Research\Rip\rs\result\dino_4s\0.1\03 \
                                --score-thr 0.1 \
                                --device cpu \
                                --area-thr 50000000.0 

Loads checkpoint by local backend from path: model/best_rs_78.pth
[                                                  ] 0/22, elapsed: 0s, ETA:
[>                                 ] 1/22, 0.4 task/s, elapsed: 2s, ETA:    49s------------len(result.pred_instances)-----------
300
------------pred_instances > args.score_thr -----------
8
<InstanceData(

    META INFORMATION

    DATA FIELDS
    labels: tensor([0, 0, 0, 0, 0, 0, 0, 0])
    bboxes: tensor([[638.9840, 885.8066, 703.5042, 928.6477],
                [576.2604, 821.2333, 605.5704, 838.3709],
                [476.7833, 651.4491, 511.4456, 675.1157],
                [548.8909, 243.6960, 575.2685, 282.5593],
                [640.2486, 886.3217, 703.4479, 927.8692],
                [638.5549, 886.5968, 703.9271, 927.8839],
                [641.8467, 888.9625, 701.0181, 926.3056],
                [637.9538, 886.2655, 703.6799, 927.4672]])
    scores: tensor([0.8045, 0.7703, 0.6432, 0.3216, 0.1902, 0.1827, 0.1821, 0.1476])
) at 0x2227e1



In [None]:
! python demo/image_demo.py demo/sansha/01 \
                                configs/my/yolov5_0705.py \
                                model/yolov5_0705.pth \
                                --out-dir demo/vis/sansha-res/yolov5_0705\0.05\03 \
                                --score-thr 0.05 \
                                --device cuda:0

In [None]:
! python demo/image_demo.py demo/sansha/01 \
                                configs/my/yolov8_swin_rip.py \
                                model/best_coco_bbox_mAP_epoch_295_yolov8_swin_bs8.pth \
                                --out-dir demo/vis/sansha-res/yolov8_swin_rip_e295 \
                                --score-thr 0.03 \
                                --device cuda:0

In [None]:
! python demo/image_demo.py demo/houhai/03 \
                                configs/my/yolov8_swin_rip.py \
                                model/best_coco_bbox_mAP_epoch_295_yolov8_swin_bs8.pth \
                                --out-dir demo/vis/houhai-res/yolov8_swin_rip_e295 \
                                --score-thr 0.5 \
                                --device cuda:0

In [None]:
! python demo/image_demo.py demo/houhai/03 \
                                configs/my/yolov5_swin_rip.py \
                                model/best_coco_bbox_mAP_epoch_200_yolov5_swin_bs8.pth \
                                --out-dir demo/vis/houhai-res/yolov5_swin_rip \
                                --score-thr 0.03 \
                                --device cuda:0

In [None]:
! python demo/image_demo.py demo/sansha/01 \
                                configs/my/yolov5_swin_rip.py \
                                model/best_coco_bbox_mAP_epoch_200_yolov5_swin_bs8.pth \
                                --out-dir demo/vis/sansha-res/yolov5_swin_rip \
                                --score-thr 0.03 \
                                --device cuda:0

```python
这是在视频样例上进行推理的脚本。  
python demo/video_demo.py \  
    ${VIDEO_FILE} \  
    ${CONFIG_FILE} \  
    ${CHECKPOINT_FILE} \  
    [--device ${GPU_ID}] \  
    [--score-thr ${SCORE_THR}] \  
    [--out ${OUT_FILE}] \  
    [--show] \  
    [--wait-time ${WAIT_TIME}]  
  
运行样例：  
python demo/video_demo.py demo/demo.mp4 \  
    configs/rtmdet/rtmdet_l_8xb32-300e_coco.py \  
    checkpoints/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth \  
    --out result.mp4  
视频样例，显卡加速版本  
这是在视频样例上进行推理的脚本，使用显卡加速。  

python demo/video_gpuaccel_demo.py \  
     ${VIDEO_FILE} \  
     ${CONFIG_FILE} \  
     ${CHECKPOINT_FILE} \  
     [--device ${GPU_ID}] \  
     [--score-thr ${SCORE_THR}] \  
     [--nvdecode] \  
     [--out ${OUT_FILE}] \  
     [--show] \  
     [--wait-time ${WAIT_TIME}]  
运行样例：  
python demo/video_gpuaccel_demo.py demo/demo.mp4 \  
    configs/rtmdet/rtmdet_l_8xb32-300e_coco.py \  
    checkpoints/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth \  
    --nvdecode --out result.mp4  

8 GPU  
./tools/dist_test.sh configs/glip/glip_atss_swin-t_fpn_dyhead_pretrain_obj365.py glip_tiny_a_mmdet-b3654169.pth 8

tools/dist_test.sh 也支持多节点测试，不过需要依赖 PyTorch 的 启动工具  

如果你的数据集格式是 VOC 或者 Cityscapes，你可以使用 tools/dataset_converters 内的脚本直接将其转化成 COCO 格式。如果是其他格式，可以使用 images2coco 脚本 进行转换。  
python tools/dataset_converters/images2coco.py \  
    ${IMG_PATH} \  
    ${CLASSES} \  
    ${OUT} \  
    [--exclude-extensions]  
参数：  
IMG_PATH: 图片根路径。  
CLASSES: 类列表文本文件名。文本中每一行存储一个类别。  
OUT: 输出 json 文件名。 默认保存目录和 IMG_PATH 在同一级。  
exclude-extensions: 待排除的文件后缀名。  
在转换完成后，使用如下命令进行测试  
  单 GPU 测试    
python tools/test.py \  
    ${CONFIG_FILE} \  
    ${CHECKPOINT_FILE} \  
    [--show]  


In [None]:
! python demo/featmap_vis_demo.py demo/houhai/01 \
                                configs/my/yolov5_swin_rip.py \
                                model/best_coco_bbox_mAP_epoch_200_yolov5_swin_bs8.pth  \
                                --target-layers backbone \
                                --channel-reduction select_max \
                                --out-dir demo/vis/houhai-featmap/yolov5_swin_rip


## 可视化特征图

In [None]:
! python demo/featmap_vis_demo.py demo/houhai \
                                configs/yolov8/yolov8_s_swin_t-v61_1xb2-1e_coco128.py \
                                model/swin_tiny_patch4_window7_224.pth \
                                --target-layers backbone \
                                --channel-reduction select_max \
                                --out-dir demo/vis/houhai-featmap

In [None]:
# 多 Target Layer
# 可视化 backbone 输出的 backbone.stage4 和 backbone.stage3 2 个层的平均激活

# @staticmethod
# def draw_featmap(featmap: torch.Tensor, # 输入格式要求为 CHW
#                  overlaid_image: Optional[np.ndarray] = None, # 如果同时输入了 image 数据，则特征图会叠加到 image 上绘制
#                  channel_reduction: Optional[str] = 'squeeze_mean', # 多个通道压缩为单通道的策略
#                  topk: int = 10, # 可选择激活度最高的 topk 个特征图显示
#                  arrangement: Tuple[int, int] = (5, 2), # 多通道展开为多张图时候布局
#                  resize_shape：Optional[tuple] = None, # 可以指定 resize_shape 参数来缩放特征图
#                  alpha: float = 0.5) -> np.ndarray: # 图片和特征图绘制的叠加比例

# 案例1.最大激活### 案例 1：最大激活
# 可视化 backbone 输出的 3 个层的最大激活层

! python demo/featmap_vis_demo.py demo/dog.jpg \
                                configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
                                demo/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth \
                                --target-layers backbone \
                                --channel-reduction select_max \
                                --out-file 'a.jpg'

# ### 案例 2：平均激活
# 可视化 neck 输出的 3 个层的所有输出特征图的平均激活
! python demo/featmap_vis_demo.py demo/dog.jpg \
                                configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
                                demo/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth \
                                --target-layers neck \
                                --channel-reduction squeeze_mean \
                                --out-file 'b.jpg'

# ### 案例 3：多 target layer
# 可视化 backbone 输出的 backbone.stage4 和 backbone.stage3 2 个层的平均激活
! python demo/featmap_vis_demo.py demo/dog.jpg \
                                configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
                                demo/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth \
                                --target-layers backbone.stage4 backbone.stage3 \
                                --channel-reduction squeeze_mean \
                                --out-file 'c.jpg'


# ### 案例 4：布局重排
# 可视化 backbone 输出的 backbone.stage4层的 topk 激活层，利用 --topk 4 --arrangement 2 2 参数选择多通道特征图中激活度最高的 3 个通道并采用 2x2 布局显示
! python demo/featmap_vis_demo.py demo/dog.jpg \
                                configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
                                yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth \
                                --target-layers backbone.stage4 \
                                --channel-reduction None \
                                --topk 4 \
                                --arrangement 2 2 \
                                --out-file 'd.jpg'


# ### 案例 5：打印网络结构
# 不清楚网络结构，可以打印出来，然后自己写
! python demo/featmap_vis_demo.py demo/dog.jpg \
                                configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
                                yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth \
                                --preview-model 
                                --method ablationcam

### Grad-Based CAM 可视化
分析特征层 bbox 级别的 Grad CAM

In [None]:
# (a) 查看 neck 输出的最小输出特征图的 Grad CAM
python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                --target-layer neck.out_layers[2]

# (b) 查看 neck 输出的中等输出特征图的 Grad CAM
python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                --target-layer neck.out_layers[1]

# (c) 查看 neck 输出的最大输出特征图的 Grad CAM
python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                --target-layer neck.out_layers[0]

### 自定义网络模块

### RSP

In [None]:
import torch
from torch.serialization import load
import torchvision.models as models

import imageio
from imageio import imread
 
# pretrained=True使用预训练的模型
# resnet18 = models.resnet18(pretrained=True)#创建实例，模型下载.Pth文件
swin = models.swin_v2_b(pretrained=True)#创建实例，模型下载.Pth文件
# swin = models.swin_v2_b(pretrained=False)#创建实例，模型下载.Pth文件
# model_path = r'D:\Codes\Python\AI\6_Model\orcn-swin-t-dota-latest.pth'
# model_data = torch.load(model_path)
# swin.load_state_dict(model_data)
swin.eval()# 切换到评估模式,使得模型BN层等失效
img = torch.randn(1, 3, 640, 640)
img_arr = imageio.imread(os.path.join(r'D:\Codes\Python\AI\4_Baseline\openmmlab\mmyolo-0.5.0\demo\vis\houhai', '201912.png')) #imread读入为H*W*C
img_t = torch.from_numpy(img_arr)
img_t = img_t.permute(2, 0, 1) #交换维度
img_t = img_t[:3]  #只保留前3个通道
print(img_arr)
with torch.no_grad():
     encoder_output = swin(img_t)

print(img_arr)

In [None]:
import torch
from torch.serialization import load
import torchvision.models as models

import imageio
from imageio import imread

model_pth = r'D:\Codes\Python\AI\6_Model\orcn-swin-t-dota-latest.pth'
net = torch.load(model_pth, map_location=torch.device('cpu'))
img = torch.randn(1, 3, 640, 640)
img_arr = imageio.imread(os.path.join(r'D:\Codes\Python\AI\4_Baseline\openmmlab\mmyolo-0.5.0\demo\vis\houhai', '201912.png')) #imread读入为H*W*C
img_t = torch.from_numpy(img_arr)
img_t = img_t.permute(2, 0, 1) #交换维度
img_t = img_t[:3]  #只保留前3个通道
# print(img_arr)
with torch.no_grad():
     encoder_output = net(img_t)

# print(img_arr)
print(encoder_output)



## 模型推理

In [None]:
import cv2
import mmcv
from mmcv.transforms import Compose
from mmengine.utils import track_iter_progress
from mmdet.registry import VISUALIZERS
from mmdet.apis import init_detector, inference_detector
# 指定模型的配置文件和 checkpoint 文件路径
config_file = 'configs/rtmdet/rtmdet_l_8xb32-300e_coco.py'
checkpoint_file = 'checkpoints/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth'
# 根据配置文件和 checkpoint 文件构建模型
model = init_detector(config_file, checkpoint_file, device='cuda:0')
# 初始化可视化工具
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# 从 checkpoint 中加载 Dataset_meta，并将其传递给模型的 init_detector
visualizer.dataset_meta = model.dataset_meta
# 测试单张图片并展示结果
img = 'test.jpg'  # 或者 img = mmcv.imread(img)，这样图片仅会被读一次
result = inference_detector(model, img)
# 显示结果
img = mmcv.imread(img)
img = mmcv.imconvert(img, 'bgr', 'rgb')
visualizer.add_datasample(
    'result',
    img,
    data_sample=result,
    draw_gt=False,
    show=True)
# 测试视频并展示结果
# 构建测试 pipeline
model.cfg.test_dataloader.dataset.pipeline[0].type = 'LoadImageFromNDArray'
test_pipeline = Compose(model.cfg.test_dataloader.dataset.pipeline)
# 可视化工具在第33行和35行已经初完成了初始化，如果直接在一个 jupyter nodebook 中运行这个 demo，
# 这里则不需要再创建一个可视化工具了。
# 初始化可视化工具
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# 从 checkpoint 中加载 Dataset_meta，并将其传递给模型的 init_detector
visualizer.dataset_meta = model.dataset_meta
# 显示间隔 (ms), 0 表示暂停
wait_time = 1
video = mmcv.VideoReader('video.mp4')
cv2.namedWindow('video', 0)
for frame in track_iter_progress(video_reader):
    result = inference_detector(model, frame, test_pipeline=test_pipeline)
    visualizer.add_datasample(
        name='video',
        image=frame,
        data_sample=result,
        draw_gt=False,
        show=False)
    frame = visualizer.get_image()
    mmcv.imshow(frame, 'video', wait_time)
cv2.destroyAllWindows()