In [19]:
import cv2
import torch

model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

cap = cv2.VideoCapture('Actual_Advertisement.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    results = model(frame)
    
    annotated_frame = results.render()[0]

    cv2.imshow('Frame', annotated_frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\jyash/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-14 Python-3.11.5 torch-2.4.1+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter import BooleanVar

MODEL_PATH = 'YOLO.pt'

def load_model():
    global model
    try:
        model = YOLO(MODEL_PATH)
        print(f"Model loaded: {MODEL_PATH}")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to load model: {str(e)}")

def load_ad_image():
    global ad_img
    ad_img_path = filedialog.askopenfilename(title="Select Advertisement Image", filetypes=[("Image Files", "*.jpg *.png")])
    if ad_img_path:
        ad_img = cv2.imread(ad_img_path)
        ad_label.config(text=f"Selected Ad Image: {ad_img_path.split('/')[-1]}")

def load_video():
    global video_path
    video_path = filedialog.askopenfilename(title="Select Billboard Video", filetypes=[("Video Files", "*.mp4 *.avi")])
    if video_path:
        video_label.config(text=f"Selected Video: {video_path.split('/')[-1]}")

def combine_frames(original_frame, edited_frame):
    return np.hstack((original_frame, edited_frame))

def run_program():
    if model is None or ad_img is None or not video_path:
        messagebox.showerror("Error", "Please select an advertisement image and a video before running the program.")
        return
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        messagebox.showerror("Error", "Unable to open video.")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("End of video or error reading video.")
            break

        results = model(frame)

        edited_frame = frame.copy()

        for result in results:
            for box in result.boxes:
                if box.cls == 0:  
                    x_min, y_min, x_max, y_max = box.xyxy[0]
                    x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)

                    ad_resized = cv2.resize(ad_img, (x_max - x_min, y_max - y_min))

                    billboard_corners = np.array([[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]], dtype="float32")
                    ad_corners = np.array([[0, 0], [ad_resized.shape[1], 0], [ad_resized.shape[1], ad_resized.shape[0]], [0, ad_resized.shape[0]]], dtype="float32")

                    matrix = cv2.getPerspectiveTransform(ad_corners, billboard_corners)
                    warped_ad = cv2.warpPerspective(ad_resized, matrix, (frame.shape[1], frame.shape[0]))

                    mask = np.zeros_like(frame, dtype=np.uint8)
                    cv2.fillConvexPoly(mask, billboard_corners.astype(int), (255, 255, 255))

                    edited_frame = cv2.bitwise_and(edited_frame, cv2.bitwise_not(mask))
                    edited_frame = cv2.add(edited_frame, warped_ad)

        combined_frame = combine_frames(frame[:, :, ::-1], edited_frame)  

        combined_frame_resized = cv2.resize(combined_frame, (800, 400))  
        cv2.imshow('Original and Edited Video', combined_frame_resized)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

root = tk.Tk()
root.title("Billboard Ad Replacement")

model = None
ad_img = None
video_path = None

load_model()

ad_frame = tk.Frame(root)
ad_frame.pack(pady=10)
ad_button = tk.Button(ad_frame, text="Select Ad Image", command=load_ad_image)
ad_button.pack(side=tk.LEFT)

ad_label = tk.Label(ad_frame, text="No Ad Image Selected")
ad_label.pack(side=tk.LEFT)

video_frame = tk.Frame(root)
video_frame.pack(pady=10)
video_button = tk.Button(video_frame, text="Select Billboard Video", command=load_video)
video_button.pack(side=tk.LEFT)

video_label = tk.Label(video_frame, text="No Video Selected")
video_label.pack(side=tk.LEFT)

run_button = tk.Button(root, text="Run Program", command=run_program)
run_button.pack(pady=20)

root.mainloop()

Model loaded: YOLO.pt

0: 384x640 1 billboard, 8.0ms
Speed: 2.0ms preprocess, 8.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 8.0ms
Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 7.5ms
Speed: 2.0ms preprocess, 7.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 8.0ms
Speed: 2.8ms preprocess, 8.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 10.0ms
Speed: 1.0ms preprocess, 10.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 8.0ms
Speed: 1.0ms preprocess, 8.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 7.4ms
Speed: 2.1ms preprocess, 7.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 billboard, 8.0ms
Speed: 2.0ms preprocess, 8.0ms inference, 2.0ms postproc

In [3]:
# Print detected class indices
for *xyxy, conf, cls in results.xyxy[0]:
    print(int(cls))  # Print detected class index


In [9]:
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch2.0/index.html



Defaulting to user installation because normal site-packages is not writeable
Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu117/torch2.0/index.html


ERROR: Could not find a version that satisfies the requirement detectron2 (from versions: none)
ERROR: No matching distribution found for detectron2
