In [21]:
import cv2
print(cv2.__version__)

4.9.0


In [23]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO
import matplotlib.pyplot as plt

In [None]:
'''resizes the image to a standard size,
 converts to numerical format, 
 normalizes the values, 
 and expands the dimensions. 
 This standardizes the input image before
   feeding into the neural network.'''


def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC) # Interpolación es un método utilizado 
                                                        # para estimar valores de píxeles desconocidos 
                                                        # cuando realizo cambio de tamaño
    image = np.array(image, dtype='float32') # Convierto la imagen previamente procesada en un np.array
    image /= 255. # Normalizo la imagen, es decir, la llevo a un rango de 0 a 1
    image = np.expand_dims(image, axis=0) # Añado una dimensión extra al array, para que sea de 4 dimensiones, me lo pedirá la red neuronal

    return image
print(process_image(img))


The function takes in an image (img) and processes it through several steps:

Resize the image to 416 x 416 pixels using cubic interpolation. This interpolation method estimates the unknown pixel values when resizing to maintain image quality.

Convert the resized image to a numpy array of float32 data type. This formats the image into a numerical array for easier processing.

Normalize the pixel values to be between 0 and 1 by dividing the entire image by 255. This scales the values to a range that is more manageable for the neural network.

Expand the dimensions of the array by adding a new axis at index 0. This makes the array 4D instead of 3D, which is required as input for the neural network model.



In [4]:
'''This provides the list of class name strings 
that will be used to identify classes 
in the neural network model. 
The returned list contains the class names
 with no extra whitespace.'''


def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines() # Leo el archivo que contiene los nombres de las clases y los guardo en una lista
    class_names = [c.strip() for c in class_names] # Elimino los espacios en blanco al ppio y al final de cada línea

    return class_names

The function takes in a file path for a file that contains class names (file). It then:

Opens the file and reads all the lines, storing them in a list called class_names. This reads the class name strings from the file.

Strips any whitespace characters (spaces, newlines) from the beginning and end of each class name string in the list. This cleans up the strings.

Returns the class_names list.

So in simple terms, it:

Opens the class name file
Reads the lines into a list
Removes extra whitespace from the strings
Returns the list of clean class name strings

In [5]:
'''Takes the raw detection results, 
calculates nice box coordinates, 
draws boxes and text onto the image,
 and prints some info to the console. 
 This creates a nice visual detection
   output.
'''


def draw(image, boxes, scores, classes, all_classes): # Boxes y Scores son los resultados de la red neuronal, me lo dará yolo.predict
                                                        #all_classes es la lista con los nombres de las clases, lo cojo de get_classes(file) más adelante 
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

The goal of this function is to take the results from the neural network prediction (boxes, scores, classes) and draw them on the original image to visualize the detections.

It takes in:

The original image
The bounding box coordinates for each detected object
The confidence scores for each detection
The class IDs for each detection
A list of class name strings
Then it loops through each detection and:

Extracts the box coordinates, score, and class ID

Calculates the top, left, right, and bottom pixel coordinates for the box based on the x, y, width, and height

Draws a rectangle on the image using those box coordinates and a color

Writes the class name and score on the image next to the box

Prints the class name, score, and box coordinates to the console



In [6]:
'''it preprocesses the image, 
runs object detection using the YOLO model,
 draws the detections on the image,
  and returns the final output image. 
  This provides the full pipeline 
  from input image to processed output 
  image with object detection results.
'''

def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

The goal is to run a YOLO object detection model on an input image and draw the detections on the image.

It takes as input:

The original image
The YOLO model object
A list of all class name strings
It then:

Preprocesses the image using the process_image function to resize, format, and normalize it for the model.

Runs the YOLO model to get the predicted boxes, class IDs, and confidence scores. It times how long this takes.

If there are any detected boxes, it calls the draw function to draw the boxes, class names, and scores on the original input image.

Returns the image with the detections drawn on it.

In [7]:
'''It opens the video
Loops through frames
Runs object detection on each frame
Displays and saves the detected frames
Closes the video and cleanup
This provides a full pipeline
 to run object detection on a video
  and save the output.

'''
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    if not os.path.exists(video_path):
        os.makedirs(video_path)
    camera = cv2.VideoCapture(video_path)
    
    if not camera.isOpened():
        print("Failed to open input video!")
        return
    
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 10, sz, True)

    if vout.isOpened():
        print("Video writer is open!")
    else:
        print("Failed to open video writer!")

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

The goal is to run object detection on a video file using a trained YOLO model.

It takes as input:

The video file path
The YOLO model
List of class name strings
It then:

Opens the video file using OpenCV

Checks if the video opened successfully

Creates a window to display the video

Initializes a VideoWriter to save the output video

Loops through each frame of the video:

Runs object detection on the frame using detect_image()
Displays the detected frame in the window
Writes the detected frame to the output video
Closes the video writer and releases the camera

In [8]:
'''Create a YOLO model instance
Get path to class name file
Extract class names from file into a list
Store the list of class names in all_classes
This provides the YOLO model object and list 
of class names that will be used
 for detecting objects in images/video.
'''
yolo = YOLO(0.3, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)



Create a YOLO object detection model instance, with confidence threshold of 0.3 and IOU threshold of 0.5.

Get the path to the coco_classes.txt file which contains the list of class names.

Call the get_classes() function, passing the file path.

get_classes() reads the class name strings from the file and returns a list of all the classes.

Store this list of classes in the all_classes variable.

In [9]:
'''Defines the input image file
Constructs the full path
Reads in the image
Runs object detection
Saves the detected image to an output file
This loads an image, runs detection on it using YOLO,
 and saves the output image with bounding boxes drawn.'''

# # SI QUEREMOS DETECTAR UNA IMAGEN GUARDADA:

# f = 'perro.jpg'
# path = 'imagenes/'+f
# image = cv2.imread(path)
# image = detect_image(image, yolo, all_classes)
# cv2.imwrite('imagenes/res/' + f, image) 

'Defines the input image file\nConstructs the full path\nReads in the image\nRuns object detection\nSaves the detected image to an output file\nThis loads an image, runs detection on it using YOLO,\n and saves the output image with bounding boxes drawn.'

Define the filename of the image to detect as 'perro.jpg' and store in variable f.

Construct the full path to the image file by concatenating 'imagenes/' and the filename f. Store this path in variable path.

Use cv2.imread() to read the image from the path and load it into a numpy array. Store this in variable image.

Call the detect_image() function, passing the image, yolo model, and all_classes list. This runs detection on the image.

Save the detected image to 'imagenes/res/perro.jpg' by concatenating the path and filename f.

In [10]:
'''Calls detect_video(), passing the video file, 
model, and classes detect_video() handles opening the video,
 detecting objects, and saving the output
This runs YOLO object detection on each frame of the input video and saves a new video with detections drawn on it.

So with one line we can easily process a full video for object detection using the YOLO model.'''

# SI QUEREMOS DETECTAR UN VIDEO GUARDADO:
detect_video('prueba.mp4',yolo,all_classes)

Failed to open input video!


OpenCV: Couldn't read video stream from file "videos/test/prueba.mp4"


Call the detect_video() function, passing:

The filename of the video to detect ('prueba.mp4')
The yolo model object
The all_classes list of class names
detect_video() will then:

Open the video file
Loop through each frame
Run object detection on each frame using yolo and all_classes
Display and save the detected frames to an output video
