In [1]:
import os
import cv2
import copy
import openai
import numpy as np

from torchvision import transforms
import torchvision.models as models
import torch

import matplotlib.pyplot as plt
from functions import *

  warn(


In [2]:
def load_deeplab_model():
    """
    Load the DeepLab model.

    Returns:
        torch.model: The loaded DeepLab model.
    """
    model = models.segmentation.deeplabv3_resnet101(pretrained=True)
    model.eval()
    return model

def apply_semantic_segmentation(model, img):
    """
    Apply semantic segmentation to the image using the DeepLab model.

    Args:
        model (torch.model): The loaded DeepLab model.
        img (ndarray): The image to segment.

    Returns:
        ndarray: The segmented image.
    """
    # define the transformations
    trf = transforms.Compose([transforms.ToTensor(), 
                              transforms.Normalize(mean = [0.485, 0.456, 0.406], 
                                                   std = [0.229, 0.224, 0.225])])
    
    # apply transformations
    inp = trf(img).unsqueeze(0)

    # perform forward pass and get predictions
    out = model(inp)['out']
    om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy()

    # create RGB version of mask
    r = np.zeros_like(om).astype(np.uint8)
    g = np.zeros_like(om).astype(np.uint8)
    b = np.zeros_like(om).astype(np.uint8)
    r[om == 1] = 255
    g[om == 0] = 255
    segmented_image = np.stack([r, g, b], axis=2)

    return segmented_image

In [3]:
def draw_boxes(image, detected_objects):
    """
    Draw bounding boxes and labels on the image for detected objects.

    Args:
        image (ndarray): The image on which to draw bounding boxes and labels.
        detected_objects (list): List of dictionaries, each containing label, confidence, and location of a detected object.
    """
    for obj in detected_objects:
        label = obj["label"]
        confidence = obj["confidence"]
        location = obj["location"]
        
        # calculate bounding box coordinates
        center_x, center_y = location["center"]
        width, height = location["width"], location["height"]
        x = int(center_x * image.shape[1] - width * image.shape[1] / 2)
        y = int(center_y * image.shape[0] - height * image.shape[0] / 2)
        w = int(width * image.shape[1])
        h = int(height * image.shape[0])
        
        color = (0, 255, 0)  # green
        
        # draw bounding box
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        
        # draw label with confidence score
        text = f"{label}: {confidence:.2f}"
        cv2.putText(image, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

In [4]:
video_path = os.path.join('videos', 'video_1.mp4')
output_path = os.path.join('videos', 'video_1_processed.mp4')

In [5]:
# load YOLO model and output layers
net, output_layers = load_yolo_model()

# open video file
cap = cv2.VideoCapture(video_path)

# check if video capture was successful
if not cap.isOpened():
    print("Unable to open Video!")
    exit()

# get frames per second and frame size
fps = cap.get(cv2.CAP_PROP_FPS)
frame_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# save processed video with VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_w, frame_h))

# loop to process each frame in video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # detect objects in frame
    outs, width, height = detect_objects(net, output_layers, frame)
    classes, colors = get_classes_colors()
    detected_objects = process_detections(outs, width, height, classes, colors, frame)
    
    # draw bounding boxes and labels on frame
    draw_boxes(frame, detected_objects)
    
    # display frame with detected objects and save it
    cv2.imshow("Video", frame)
    cv2.waitKey(1)
    out.write(frame)

# release video capture and output writer objects
cap.release()
out.release()

cv2.destroyAllWindows()