# YOLOv4
## Images
1. `cv.dnn.readNet(model, config="", framework="")`: Reads deep learning network represented in one of the supported formats.
    - `model`
        - `*.caffemodel`: Caffe
        - `*.pb`: TensorFlow
        - `*.t7`: PyTorch
        - `*.weights`: Darknet
        - `*.bin`: DLDT
        - `*.onnx`: ONNX
    - `config`
        - `*.prototxt`: Caffe
        - `*.pbtxt`: TensorFlow
        - `*.cfg`: Darknet
        - `*.xml`: DLDT
2. `cv.dnn.readNet(framework, bufferModel, bufferConfig="")`: Overloaded member function of the above one.
3. `cv.dnn.Model.setInputParams(scale=1.0, size, mean, swapRB=False, crop=False)`: `scale`, `size`, `mean` indicates multiplier for frame values, new input size, and scalar with mean values which are subtracted from channels respectively. The `swapRB` flag means that swap first & last channel, and `crop` denotes whether image will be cropped after resize or not. The formula is `blob(n, c, y, x) = scale * resize(frame(y, x, c)) - mean(c)`.

In [1]:
'''
    Python Packages
    1. os
    2. cv2
    3. time
'''
import os
import cv2
import time

'''
    Class Components
    class Name: YOLOv4
    class Init components : nms_threshold, confidence_threshold, class_labels, image_path, path_to_cfg, path_to
    target: YOLOv4 Inference on Images
'''
class YOLOv4:
    # Initialization of  Parameters
    def __init__(self, nms_threshold, conf_threshold, path_to_classes, image_path, path_to_cfg, path_to_weights):
        # Non max suppression threshold
        self.nms_threshold = nms_threshold
        # Confidence threshold
        self.conf_threshold = conf_threshold
        # Class labels 
        self.path_to_classes = path_to_classes
        # Image path
        self.image_path = image_path
        # Path to configuration
        self.path_to_cfg = path_to_cfg
        # Path to weights
        self.path_to_weights = path_to_weights
        # Read classes file with `open()`
        with open(path_to_classes, 'r') as read_class:
            self.class_labels = [classes.strip() for classes in read_class.readlines()]

        # Frame image
        # Load images 
        self.frames = self.load_images(self.image_path)

        # Preprocess images and resize it
        for self.frame in self.frames:
            self.image = cv2.imread(self.frame)
            
            # Get height and width of images
            self.original_h, self.original_w = self.image.shape[:2]
            dimension = (640, 640)
            # Resize images
            self.resized_image = cv2.resize(self.image, dimension, interpolation=cv2.INTER_AREA)
            # Get new height and width of resized image
            self.new_h, self.new_w = self.resized_image.shape[:2]

            # Call function `inference_run`
            self.inference_run(self.resized_image)
            
    '''
        Function target: Load images from the image path
        param[1]: self
        param[2]: image_path
    '''
    def load_images(self, image_path):
        # List of images
        image_list = []
        
        for image_original in os.listdir(image_path):
            if image_original.endswith('.jpg') or image_original.endswith('.jpeg') or image_original.endswith('.png'):
                image_full_path = os.path.join(image_path, image_original)
                image_list.append(image_full_path)                
        
        return image_list

    '''
        target: Inference DNN Opencv with ONNX
        param[1]: path to YOLOv4 confioguration
        param[2]: path to YOLOv4 weights
    '''
    def inference_dnn(self, path_to_cfg, path_to_weights):
        # Read YOLOv4's weights and configuration
        network = cv2.dnn.readNet(path_to_cfg, path_to_weights)
        # GPU or CPU
        network.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        # Floating point 16
        network.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)

        # Create net from file with trained weights and config
        model = cv2.dnn_DetectionModel(network)
        # Set model parameters 
        model.setInputParams(size=(416, 416), scale=1 / 255, swapRB=True)
        '''
        classes: Class indexes in result detection
        confidences: A set of corresponding confidences
        boxes: A set of bounding boxes
        '''
        classes, scores, boxes = model.detect(self.image, self.conf_threshold, self.nms_threshold)

        return classes, scores, boxes
    
    '''
    target: Inference Run and Draw Bounding boxes
    param[1]: image
    '''
    def inference_run(self, image):
        # Start 
        start = time.time()

        # Get classes, boxes & score then make inferences for every frame
        classes, scores, boxes = self.inference_dnn(self.path_to_cfg, self.path_to_weights)
        end = time.time()

        # Frame time
        frame_time = end - start
        # Frame per second
        fps = int(1 / frame_time)
 
        '''
        Calculate new scale of image which is image formed between original and resized
        '''
        # New image height ratio
        ratio_h = self.new_h / self.original_h
        # New image width ratio
        ratio_w = self.new_w / self.original_w
 
        for (class_idx, score, box) in zip(classes, scores, boxes):
            # print(f"Class ID: {class_idx}, Score : {score},  Box: {box}")
            print(f"Box Coordinates:", box)
            
            # Normalize bounding box to detection
            # x
            box[0] = int(box[0] * ratio_w)
            # y
            box[1] = int(box[1] * ratio_h)
            # Width
            box[2] = int(box[2] * ratio_w)
            # Height
            box[3] = int(box[3] * ratio_h)

            cv2.rectangle(image, box, (0, 255, 0), 2)
            label = "Frame Time: %.2f ms, FPS: %.2f, ID: %s, Score: %.2f," % (frame_time, fps, self.class_labels[class_idx], score)

            # Calculate fps
            cv2.putText(image, label, (box[0] - 30, box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,  (252, 0, 0), 2)
        
        # Show image
        cv2.imshow("Image Detected:", image)
        cv2.waitKey(2000)
        cv2.destroyAllWindows()

path_to_classes = 'yolov4/coco-classes.txt'
image_path = 'yolov4/images'
path_to_cfg = 'yolov4/models/yolov4-tiny.cfg'
path_to_weights = 'yolov4/models/yolov4-tiny.weights'

# Call class instance
YOLOv4(nms_threshold=0.3, conf_threshold=0.38, path_to_classes=path_to_classes, image_path=image_path, path_to_cfg=path_to_cfg, path_to_weights=path_to_weights)
# If the log shows `setUpNet DNN module was not built with CUDA backend; switching to CPU`, you need to build OpenCV with CUDA from source in order to use GPU

Box Coordinates: [ 664    0  650 1092]
Box Coordinates: [139  24 503 481]
Box Coordinates: [106  33 802 659]


<__main__.YOLOv4 at 0x7963389a1df0>

## Videos

In [2]:
'''
    Python Packages
    1. os
    2. cv2
    3. time
'''
import os
import cv2
import time

'''
    Class Components
    class Name : YoloV4DNN
    class Init components : nms_threshold, confidence_threshold, class_labels, image_path, yolov4 : [path_to_cfg_yolo, path_to_weights
    target: YOLOV4 DNN Inference on Images
'''
class YOLOv4:
    # Initialize of parameters
    def __init__(self, nms_threshold, conf_threshold, path_to_classes, video_file, path_to_cfg, path_to_weights):
        # Non max suppression threshold
        self.nms_threshold = nms_threshold
        # Confidence threshold
        self.conf_threshold = conf_threshold
        # Class labels 
        self.path_to_classes = path_to_classes
        # Video path
        self.video_file = video_file
        # Path to configuration
        self.path_to_cfg = path_to_cfg
        # Path to weights
        self.path_to_weights = path_to_weights

        # Read classes COCO file with `open()`
        with open(path_to_classes, 'r') as read_class:
            self.class_labels= [classes.strip() for classes in read_class.readlines()]

    '''
        target: Inference DNN Opencv with ONNX
        param[1]: path to yolov4 confioguration
        param[2]:  path to yolov4 weights
    '''
    def inference_dnn(self, frame, path_to_cfg, path_to_weights):
        # Read YOLOv4's weights and configuration
        network = cv2.dnn.readNet(path_to_cfg, path_to_weights)
        # GPU or CPU 
        network.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        # Floating point 16
        network.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) 

        # Create net from file with trained weights and config 
        model = cv2.dnn_DetectionModel(network)

        # Set model parameters 
        model.setInputParams(size=(416, 416), scale=1 / 255, swapRB=True)

        '''
        class_ids: Class indexes in result detection.
        [out] confidences: A set of corresponding confidences.
        [out] boxes: A set of bounding boxes.
        '''
        classes, scores, boxes = model.detect(frame, self.conf_threshold, self.nms_threshold)

        return classes, scores, boxes
    
    '''
    target: Inference run & draw bounding boxes
    param[1]: image
    '''
    def inference_run(self):
        video_capture = cv2.VideoCapture(self.video_file)

        if (video_capture.isOpened() == False):
            print('Error openeing video File')
        
        while(video_capture.isOpened()):
            # Start
            grabbed, frame = video_capture.read()

            if not grabbed:
                break

            # Resize frame
            frame = cv2.resize(frame, (1000, 800)) 

            # Start
            start = time.time()

            # Get classes, boxes & scores and make inference for every frame
            classes, scores, boxes = self.inference_dnn(frame, self.path_to_cfg, self.path_to_weights)

            # End
            end = time.time()
            
            # Frame time
            frame_time = end - start
            # Frame per second
            fps = int(1 / frame_time)
    
            '''
            Calculate new scale of image which is image formed between original and resized
            '''                   
            for (class_idx, score, box) in zip(classes, scores, boxes):
                # print(f"Class ID: {class_id}, Score : {score},  Box: {box}")

                cv2.rectangle(frame , box, (0, 255, 0), 2)
                label = "Frame Time: %.2f ms, FPS: %.2f, ID: %s, Score: %.2f," % (frame_time, fps, self.class_labels[class_idx], score)

                # Calculate FPS
                cv2.putText(frame, label, (box[0] - 40, box[1] - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (252, 0, 0), 2)
        
            # cv2.namedWindow('Image Detected:', cv2.WINDOW_NORMAL)
            # cv2.resizeWindow("Image Detected: ", 1000, 1000)
            cv2.imshow("Image detected:", frame)

            # Set `delay` for `cv2.waitKey()`
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
                    
        video_capture.release()
        cv2.destroyAllWindows()
        
def main():
    # Paths
    path_to_classes = 'yolov4/coco-classes.txt'
    video_file = 'yolov4/videos/main_demo.mp4'
    path_to_cfg = 'yolov4/models/yolov4-tiny.cfg'
    path_to_weights = 'yolov4/models/yolov4-tiny.weights'

    # Call class instance
    yolo = YOLOv4(nms_threshold=0.3, conf_threshold=0.38, path_to_classes=path_to_classes, video_file=video_file, path_to_cfg=path_to_cfg, path_to_weights=path_to_weights)
    yolo.inference_run()
    
if __name__ == "__main__":
    main()