In [1]:
import sys
!{sys.executable} -m pip install import-ipynb



# YOLO目标检测的推理（预测）脚本

用于将训练好的模型加载进来，对输入的图片进行目标检测预测、绘制框、输出类别和置信度等操作。

In [3]:
import colorsys
import os
import time

import numpy as np
import torch
import torch.nn as nn
from PIL import ImageDraw, ImageFont
import import_ipynb
from darknet import YoloBody
from utils import (cvtColor, get_anchors, get_classes, preprocess_input, resize_image, show_config)
from darknet import DecodeBox

class YOLO(object):
    _defaults = {
        # The model_path points to the weight file under the logs folder, and classes_path points to the .txt file under model_data.
        # After training, there will be multiple weight files in the logs folder. Choose the one with the lowest validation loss.
        # A lower validation loss does not necessarily indicate a higher mAP; it only means that the weights generalize better on the validation set.
        "model_path"        : 'logs/best_epoch_weights.pth',
        "classes_path"      : 'model_data/voc_classes.txt',
        # anchors_path refers to the .txt file containing the anchor boxes and is generally not modified.
        # anchors_mask helps the code locate the corresponding anchor boxes and is also generally not modified.
        "anchors_path"      : 'model_data/yolo_anchors.txt',
        "anchors_mask"      : [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
        # The input image size must be a multiple of 32.
        "input_shape"       : [416, 416],
        # Only the predicted boxes with a score higher than the confidence threshold will be kept.
        "confidence"        : 0.5,
        # The nms_iou value used for Non-Maximum Suppression (NMS).
        "nms_iou"           : 0.3,
        # This variable is used to control whether to use letterbox_image for distortion-free resizing of the input image.
        # After multiple tests, it was found that turning off letterbox_image and using direct resizing yields better results.
        "letterbox_image"   : False,
        # Whether to use CUDA.
        "cuda"              : True,
    }

    @classmethod
    def get_defaults(cls, n): 
        if n in cls._defaults:
            return cls._defaults[n] # If the parameter name (string) n exists in the class variable _defaults, return the corresponding value.
        else:
            return "Unrecognized attribute name '" + n + "'" # If n is not in _defaults, return an error message indicating that the parameter name is unrecognized.


    #   初始化YOLO
    def __init__(self, **kwargs): 
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)
            self._defaults[name] = value 
            
        # Get the number of classes and anchor boxes.
        self.class_names, self.num_classes  = get_classes(self.classes_path) 
        self.anchors, self.num_anchors      = get_anchors(self.anchors_path) 
        self.bbox_util                      = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), self.anchors_mask)

        # Set different colors for drawing bounding boxes.
        hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] 
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 
        self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) 
        self.generate()
        
        show_config(**self._defaults)

    # Generate the model.
    def generate(self, onnx=False):
        # Build the YOLOv3 model and load the weights for the YOLOv3 model.
        self.net    = YoloBody(self.anchors_mask, self.num_classes) 
        device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.net.load_state_dict(torch.load(self.model_path, map_location=device)) 
        self.net    = self.net.eval() # This will disable layers like Dropout and BatchNorm, which are only used during training. This line must be added during the inference phase.
        print('{} model, anchors, and classes loaded.'.format(self.model_path)) 
        if not onnx:
            if self.cuda:
                self.net = nn.DataParallel(self.net)
                self.net = self.net.cuda() 
                
    # Detect images.
    def detect_image(self, image, crop = False, count = False):
        image_shape = np.array(np.shape(image)[0:2])
        # Convert the image to RGB here to prevent errors during prediction when using grayscale images.
        # The code only supports prediction on RGB images, so all other types of images will be converted to RGB.
        image       = cvtColor(image)
        # Add gray bars to the image to achieve distortion-free resizing.
        # Alternatively, the image can be directly resized for recognition.
        image_data  = resize_image(image, (self.input_shape[1],self.input_shape[0]), self.letterbox_image)
        # Add the batch_size dimension.
        image_data  = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()
            # Feed the image into the network for prediction!
            outputs = self.net(images)
            outputs = self.bbox_util.decode_box(outputs)
            # Stack the predicted boxes and then perform Non-Maximum Suppression (NMS).
            results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, 
                        image_shape, self.letterbox_image, conf_thres = self.confidence, nms_thres = self.nms_iou)
                                                    
            if results[0] is None: 
                return image

            top_label   = np.array(results[0][:, 6], dtype = 'int32') 
            top_conf    = results[0][:, 4] * results[0][:, 5]
            top_boxes   = results[0][:, :4]
        # Set the font and border thickness.
        font        = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness   = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1))
        
        # Draw on the image.
        for i, c in list(enumerate(top_label)):
            predicted_class = self.class_names[int(c)]
            box             = top_boxes[i]
            score           = top_conf[i]

            top, left, bottom, right = box

            top     = max(0, np.floor(top).astype('int32'))
            left    = max(0, np.floor(left).astype('int32'))
            bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
            right   = min(image.size[0], np.floor(right).astype('int32'))

            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            # label_size = draw.textsize(label, font)
            bbox = draw.textbbox((0, 0), label, font=font)
            label_size = (bbox[2] - bbox[0], bbox[3] - bbox[1])
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
            draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

        return image