# Video Recognition exercise

## Configuring the environment

In [1]:
import cv2
from ultralytics import YOLO
import math 

# MODEL loading and config
# model
model = YOLO("../models/best_from_darin.pt")

class_names = ['Hardhat','Mask','NO-Hardhat',
              'NO-Mask','NO-Safety Vest','Person',
              'Safety Cone','Safety Vest','Machinery','Vehicle']



In [3]:
# DRAWING METHODS

# Function to get class colors
def getColours(cls_num):
    base_colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
    color_index = cls_num % len(base_colors)
    increments = [(1, -2, 1), (-2, 1, -1), (1, -1, 2)]
    color = [base_colors[color_index][i] + increments[color_index][i] * 
    (cls_num // len(base_colors)) % 256 for i in range(3)]
    return tuple(color)


def drawPredictions(predictions, img): 
    for p in predictions:
        boxes = p.boxes
        for box in boxes:
            if box.conf[0] > 0.4:
                # get coordinates
                [x1, y1, x2, y2] = box.xyxy[0]
                # convert to int
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        
                cls = int(box.cls[0]) # get the class index
        
                colour = getColours(cls)
        
                # draw prediction the rectangle
                cv2.rectangle(img, (x1, y1), (x2, y2), colour, 2)
        
                # put the class name and confidence on the image
                cv2.putText(img, f'{class_names[cls]} {box.conf[0]:.2f}', (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, colour, 2)
                

## Capture Video

In [4]:
# Open the default camera
cam = cv2.VideoCapture(0)

# Get the default frame width and height
frame_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (frame_width, frame_height))


while True:
    ret, frame = cam.read()
    if not ret:
        break
    frame = cv2.flip(frame, 1)  
    results = model(frame, stream=True)
    drawPredictions(results, frame)
    
    # Write the frame to the output file
    out.write(frame)
    # Display the captured frame
    cv2.imshow('Camera', frame)

    # Press 'q' to exit the loop
    if cv2.waitKey(1) == ord('q'):
        break

# Release the capture and writer objects
cam.release()
out.release()
cv2.destroyAllWindows()
cv2.waitKey(1)






0: 384x640 (no detections), 60.4ms
Speed: 2.6ms preprocess, 60.4ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 36.5ms
Speed: 2.0ms preprocess, 36.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 33.8ms
Speed: 18.0ms preprocess, 33.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 31.4ms
Speed: 1.7ms preprocess, 31.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 27.3ms
Speed: 1.5ms preprocess, 27.3ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 27.9ms
Speed: 2.2ms preprocess, 27.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 

2024-11-23 09:02:38.970 python3[41633:2681667] +[IMKClient subclass]: chose IMKClient_Modern
2024-11-23 09:02:38.970 python3[41633:2681667] +[IMKInputSession subclass]: chose IMKInputSession_Modern



0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 24.2ms
Speed: 7.2ms preprocess, 24.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 25.4ms
Speed: 1.5ms preprocess, 25.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 24.7ms
Speed: 2.3ms preprocess, 24.7ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 24.7ms
Speed: 1.2ms preprocess, 24.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 42.0ms
Speed: 1.5ms preprocess, 42.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Mask, 1 NO-Hardhat, 1 NO-Safety Vest, 1 Person, 26.1ms
Speed: 1.3ms preprocess, 26.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 

-1

OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
