In [None]:
import cv2
from ultralytics import YOLO
import pymongo
from pymongo import MongoClient
from IPython.display import display, Javascript

In [None]:
# Load YOLO model
model = YOLO("yolo-Weights/yolov8n.pt")

In [None]:
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"]

In [None]:
# MongoDB connection
connection_string = "DBName"
client = MongoClient(connection_string, serverSelectionTimeoutMS=300000)
db = client['VisionCartCVProject']
collection = db['AddedItems']

In [None]:
# Initialize the webcam
cap = cv2.VideoCapture(0)
cap.set(3, 640)  # Width
cap.set(4, 480)  # Height

True

In [None]:
# Function to handle mouse clicks
def mouse_click(event, x, y, flags, param):
    global img
    if event == cv2.EVENT_LBUTTONDOWN:
        # Check if click is within any detected object's bounding box
        for box in last_boxes:
            x1, y1, x2, y2 = [int(v) for v in box.xyxy[0]]
            if x1 < x < x2 and y1 < y < y2:
                # Object clicked, check database and insert if new
                cls = int(box.cls[0])
                class_name = classNames[cls]
                if collection.count_documents({"name": class_name}) == 0:
                    collection.insert_one({"name": class_name, "description": f"Detected {class_name}"})
                    alert_message = f"Added {class_name} to MongoDB."
                    display(Javascript(f'alert("{alert_message}")'))
                else:
                    alert_message = f"{class_name} already in MongoDB, add other new items."
                    display(Javascript(f'alert("{alert_message}")'))
                break

cv2.namedWindow('Webcam')
cv2.setMouseCallback('Webcam', mouse_click)

last_boxes = []  # To store the last detected bounding boxes

while True:
    ret, img = cap.read()
    if not ret:
        break

    results = model(img, stream=True)
    last_boxes = []  # Clear previous boxes

    for r in results:
        boxes = r.boxes

        for box in boxes:
            x1, y1, x2, y2 = [int(v) for v in box.xyxy[0]]
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
            cls = int(box.cls[0])
            class_name = classNames[cls]
            cv2.putText(img, class_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            last_boxes.append(box)  # Save box for click detection

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 167.7ms
Speed: 6.0ms preprocess, 167.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 230.4ms
Speed: 9.0ms preprocess, 230.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 253.1ms
Speed: 7.0ms preprocess, 253.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 234.0ms
Speed: 5.0ms preprocess, 234.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 246.2ms
Speed: 7.0ms preprocess, 246.2ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 264.8ms
Speed: 8.0ms preprocess, 264.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 279.0ms
Speed: 6.0ms preprocess, 279.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 176.3ms
Speed: 6.0ms preprocess, 176.3ms inference, 4.0ms postprocess per image at

<IPython.core.display.Javascript object>


0: 480x640 1 person, 1 toothbrush, 238.0ms
Speed: 7.0ms preprocess, 238.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 167.8ms
Speed: 4.0ms preprocess, 167.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 158.0ms
Speed: 3.0ms preprocess, 158.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 177.0ms
Speed: 3.0ms preprocess, 177.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 207.7ms
Speed: 4.0ms preprocess, 207.7ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 195.7ms
Speed: 4.0ms preprocess, 195.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 174.8ms
Speed: 3.0ms preprocess, 174.8ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 48

<IPython.core.display.Javascript object>


0: 480x640 1 person, 1 toothbrush, 256.8ms
Speed: 3.0ms preprocess, 256.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 172.7ms
Speed: 4.0ms preprocess, 172.7ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 198.2ms
Speed: 3.0ms preprocess, 198.2ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 200.6ms
Speed: 3.0ms preprocess, 200.6ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 232.4ms
Speed: 3.0ms preprocess, 232.4ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 teddy bear, 219.1ms
Speed: 4.0ms preprocess, 219.1ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 toothbrush, 216.0ms
Speed: 6.0ms preprocess, 216.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 48