In [1]:
# import necessary libraries
import cv2
import torch
import json
import numpy as np
import os
from pathlib import Path
import shutil
from google.colab import files

# load yolov5 model for object detection
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # yolov5 small model

# function to detect objects and associate sub-objects with parent objects
def detect_and_associate_objects(frame, object_id_counter):
    """
    This function detects objects in the frame and associates them with sub-objects
    based on proximity, returning the results in a JSON format.

    Parameters:
    - frame: input video frame for processing
    - object_id_counter: counter for generating unique object IDs

    Returns:
    - json_output: list of detected objects and their associated sub-objects
    - object_id_counter: updated counter after processing the frame
    """
    img = frame[..., ::-1]  # convert frame to RGB for yolov5
    results = model(img)  # run object detection

    detections = results.pandas().xyxy[0]  # get results in pandas DataFrame format
    json_output = []  # list to store the final JSON result
    object_id = object_id_counter  # initialize object id counter

    for index, row in detections.iterrows():
        if row['confidence'] > 0.5:  # consider objects with confidence > 0.5
            obj_class = row['name']  # object class (e.g., car, person)
            xmin, ymin, xmax, ymax = map(int, [row['xmin'], row['ymin'], row['xmax'], row['ymax']])

            # handle parent objects like cars or persons
            if obj_class in ["car", "person"]:
                object_id += 1
                json_object = {
                    "object": obj_class,
                    "id": object_id,
                    "bbox": [xmin, ymin, xmax, ymax],
                    "subobject": None  # sub-objects are initially None
                }
                json_output.append(json_object)

            # handle sub-objects like tires, helmets, etc.
            elif obj_class in ["tire", "door", "helmet"]:
                # assign sub-object to the closest parent object based on proximity
                for obj in json_output:
                    parent_x, parent_y = (obj["bbox"][0] + obj["bbox"][2]) // 2, (obj["bbox"][1] + obj["bbox"][3]) // 2
                    sub_x, sub_y = (xmin + xmax) // 2, (ymin + ymax) // 2
                    if abs(parent_x - sub_x) < 100 and abs(parent_y - sub_y) < 100:  # proximity check
                        obj["subobject"] = {
                            "object": obj_class,
                            "id": object_id + 1,
                            "bbox": [xmin, ymin, xmax, ymax]
                        }
                        object_id += 1  # increment object id
                        break

    return json_output, object_id

# function to save cropped images for detected sub-objects
def save_cropped_images_for_subobjects(frame, detections, save_dir):
    """
    This function crops detected sub-objects from the frame and saves them as separate images.

    Parameters:
    - frame: input video frame for processing
    - detections: list of objects and their sub-objects with bounding box details
    - save_dir: directory to save the cropped images
    """
    Path(save_dir).mkdir(parents=True, exist_ok=True)  # create directory if it doesn't exist
    for detection in detections:
        x1, y1, x2, y2 = detection["bbox"]
        sub_name = detection["object"]  # name of the object (e.g., tire, helmet)
        cropped_img = frame[y1:y2, x1:x2]  # crop the sub-object image
        cropped_name = f"{save_dir}/{sub_name}_{detection['id']}.jpg"
        cv2.imwrite(cropped_name, cropped_img)  # save cropped image
        print(f"Cropped image saved: {cropped_name}")

# function to create a zip file from a directory and download it
def create_and_download_zip_file(directory, zip_name):
    """
    This function compresses the specified directory into a ZIP file and facilitates the download.

    Parameters:
    - directory: the directory to be compressed
    - zip_name: the name of the resulting zip file
    """
    shutil.make_archive(base_name=zip_name, format="zip", root_dir=directory)  # create zip file
    files.download(f"{zip_name}.zip")  # download the zip file

# upload video file for processing
uploaded = files.upload()  # upload video file
video_path = next(iter(uploaded))  # get the video file path

# create a folder to store extracted frames
output_dir = "data"
os.makedirs(output_dir, exist_ok=True)

# open video file for processing
cap = cv2.VideoCapture(video_path)

# get video properties like frame width, height, fps
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # codec for output video file

# define output video file
output_video_path = 'processed_video.mp4'
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# initialize object id counter
object_id_counter = 0  # starting object id

# process the video frame by frame
json_results = []  # list to store JSON results for each frame
currentframe = 0  # frame counter

while cap.isOpened():
    ret, frame = cap.read()  # read next frame
    if not ret:
        break

    # save frame as an image
    frame_name = f"{output_dir}/frame{currentframe}.jpg"
    cv2.imwrite(frame_name, frame)

    # detect objects and get JSON results for the current frame
    json_result, object_id_counter = detect_and_associate_objects(frame, object_id_counter)
    json_results.append(json_result)

    # visualize detections on the frame
    for obj in json_result:
        cv2.rectangle(frame, (obj["bbox"][0], obj["bbox"][1]), (obj["bbox"][2], obj["bbox"][3]), (0, 255, 0), 2)
        cv2.putText(frame, obj["object"], (obj["bbox"][0], obj["bbox"][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
        if obj["subobject"]:
            sub = obj["subobject"]
            cv2.rectangle(frame, (sub["bbox"][0], sub["bbox"][1]), (sub["bbox"][2], sub["bbox"][3]), (0, 0, 255), 2)
            cv2.putText(frame, sub["object"], (sub["bbox"][0], sub["bbox"][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)

    # write the processed frame to the output video
    out.write(frame)

    currentframe += 1  # increment frame counter

# release video resources
cap.release()
out.release()

# save JSON results to a file
with open('detection_results.json', 'w') as json_file:
    json.dump(json_results, json_file, indent=4)

# create a folder for cropped images of sub-objects
cropped_dir = "cropped_data"
os.makedirs(cropped_dir, exist_ok=True)

# save cropped sub-object images
for frame_idx in range(currentframe):
    test_frame = cv2.imread(f"{output_dir}/frame{frame_idx}.jpg")
    save_cropped_images_for_subobjects(test_frame, json_results[frame_idx], cropped_dir)

# create and download zip files for frames and cropped images
create_and_download_zip_file(output_dir, "frames")
create_and_download_zip_file(cropped_dir, "cropped_data")

# download the processed video and detection results JSON
files.download(output_video_path)
files.download('detection_results.json')

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


YOLOv5 🚀 2025-1-11 Python-3.10.12 torch-2.5.1+cu121 CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:00<00:00, 174MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Saving With helmet cat riding a bike #shorts #cat _#riding.mp4 to With helmet cat riding a bike #shorts #cat _#riding.mp4


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with a

Cropped image saved: cropped_data/person_1.jpg
Cropped image saved: cropped_data/person_2.jpg
Cropped image saved: cropped_data/person_3.jpg
Cropped image saved: cropped_data/person_4.jpg
Cropped image saved: cropped_data/person_5.jpg
Cropped image saved: cropped_data/person_6.jpg
Cropped image saved: cropped_data/person_7.jpg
Cropped image saved: cropped_data/person_8.jpg
Cropped image saved: cropped_data/person_9.jpg
Cropped image saved: cropped_data/person_10.jpg
Cropped image saved: cropped_data/person_11.jpg
Cropped image saved: cropped_data/person_12.jpg
Cropped image saved: cropped_data/person_13.jpg
Cropped image saved: cropped_data/person_14.jpg
Cropped image saved: cropped_data/person_15.jpg
Cropped image saved: cropped_data/person_16.jpg
Cropped image saved: cropped_data/person_17.jpg
Cropped image saved: cropped_data/person_18.jpg
Cropped image saved: cropped_data/person_19.jpg
Cropped image saved: cropped_data/person_20.jpg
Cropped image saved: cropped_data/person_21.jpg
C

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>