## 推理时间

In [10]:
import os
import platform
import shutil
import time
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

import sys
yolor_path = '/data/workspace/yuzijian/yolor'
sys.path.append(yolor_path)


from utils.google_utils import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import (
    check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer)
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

from models.models import *
from utils.datasets import *
from utils.general import *

In [11]:
from easydict import EasyDict

opt = EasyDict()
opt.agnostic_nms = False
opt.augment = False
opt.classes = None
opt.conf_thres = 0.5
opt.device = '0'
opt.img_size = 1280
opt.iou_thres = 0.5
opt.names = 'data/kitti.names'
opt.output = 'runs/inference/time'
opt.save_txt = False
opt.source = '/data/workspace/dataset/kitti_2d_detection/training/image_2/000001.png'
opt.update = False
opt.view_img = False
opt.cfg = 'cfg/yolor_p6.cfg'
opt.weights = ['runs/finetune/004/weights/best_ap.pt']

In [12]:
def load_classes(path):
    # Loads *.names file at 'path'
    with open(path, 'r') as f:
        names = f.read().split('\n')
    return list(filter(None, names))  # filter removes empty strings (such as last line)

In [19]:
out, source, weights, view_img, save_txt, imgsz, cfg, names = \
    opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.cfg, opt.names
webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

# Initialize
device = select_device(opt.device)
if os.path.exists(out):
    shutil.rmtree(out)  # delete output folder
os.makedirs(out)  # make new output folder
half = device.type != 'cpu'  # half precision only supported on CUDA

# Load model
model = Darknet(cfg, imgsz).cuda()
model.load_state_dict(torch.load(weights[0], map_location=device)['model'])

model.to(device).eval()
if half:
    model.half()  # to FP16


# Get names and colors
names = load_classes(names)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

# Run inference
img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

image_hwc = cv2.imread(source)
image_chw = letterbox(image_hwc, new_shape=imgsz, auto_size=64)[0]
image_chw = np.transpose(image_chw, (2,0,1))
img = torch.from_numpy(image_chw).to(device)
img = img.half() if half else img.float()  # uint8 to fp16/32
img /= 255.0  # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
    img = img.unsqueeze(0)

In [21]:
%%timeit -n 500
with torch.no_grad():
    # Inference
    pred = model(img, augment=opt.augment)[0]
    # Apply NMS
    pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)

27.1 ms ± 127 µs per loop (mean ± std. dev. of 7 runs, 500 loops each)


In [15]:
opt.cfg = 'cfg/yolor_w6.cfg'
opt.weights = ['runs/finetune/003/weights/best_ap.pt']

In [22]:
out, source, weights, view_img, save_txt, imgsz, cfg, names = \
    opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.cfg, opt.names
webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

# Initialize
device = select_device(opt.device)
if os.path.exists(out):
    shutil.rmtree(out)  # delete output folder
os.makedirs(out)  # make new output folder
half = device.type != 'cpu'  # half precision only supported on CUDA

# Load model
model = Darknet(cfg, imgsz).cuda()
model.load_state_dict(torch.load(weights[0], map_location=device)['model'])

model.to(device).eval()
if half:
    model.half()  # to FP16


# Get names and colors
names = load_classes(names)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

# Run inference
img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

image_hwc = cv2.imread(source)
image_chw = letterbox(image_hwc, new_shape=imgsz, auto_size=64)[0]
image_chw = np.transpose(image_chw, (2,0,1))
img = torch.from_numpy(image_chw).to(device)
img = img.half() if half else img.float()  # uint8 to fp16/32
img /= 255.0  # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
    img = img.unsqueeze(0)

In [41]:
%%timeit -n 500
with torch.no_grad():
    # Inference
    pred = model(img, augment=opt.augment)[0]
    # Apply NMS
    pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)

27.1 ms ± 75.7 µs per loop (mean ± std. dev. of 7 runs, 500 loops each)


In [24]:
img.shape

torch.Size([1, 3, 448, 1280])