In [None]:
from ultralytics import YOLO
import cv2
import cvzone
import math
import time
from gtts import gTTS
import pygame

classNames = ["insan", "bisiklet", "araba", "motorsiklet", "uçak", "otobüs", "tren", "kamyon", "bot",
              "trafik ışığı", "yangın musluğu", "dur işareti", "parkmetre", "bank", "kuş", "kedi",
              "köpek", "at", "koyun", "inek", "fil", "ayı", "zebra", "zürafa", "sırt çantası", "şemsiye",
              "el çantası", "kravat", "bavul", "frizbi", "kayaklar", "snowboard", "spor topu", "uçurtma", "beyzbol sopası",
              "beyzbol eldiveni", "kaykay", "sörf tahtası", "tenis raketi", "şişe", "şarap kadehi", "bardak",
              "çatal", "bıçak", "kaşık", "tas", "muz", "elma", "sandviç", "turuncu", "brokoli",
              "havuç", "sosisli sandviç", "pizza", "donut", "kek", "sandalye", "kanepe", "saksı bitkisi", "yatak",
              "yemek masası", "tuvalet", "televizyon", "laptop", "mouse", "kumanda", "klavye", "cep telefonu",
              "mikrodalga", "fırın", "tost makinası", "lavabo", "buzdolabı", "kitap", "saat", "vazo", "makas",
              "oyuncak ayı", "saç kurutma makinesi", "diş fırçası"
              ]

prev_frame_time = 0
new_frame_time = 0

cap = cv2.VideoCapture(0)

model = YOLO("yolov8n.pt")

pygame.init()

sayac = 213951293
start_time = time.time()

closest_distance_object = ""
closest_distance = float("inf")

while True:
    new_frame_time = time.time()
    success, img = cap.read()

    img = cv2.resize(img, (1280,720), interpolation=cv2.INTER_AREA)

    screen_center_x = img.shape[1] // 2
    screen_center_y = img.shape[0] // 2
    cv2.circle(img, (screen_center_x, screen_center_y), 5, (0, 255, 0), -1)  

    results = model(img, stream=True)
    detected_objects = []
    for r in results:
        boxes = r.boxes
        for box in boxes:
            # Confidence
            conf = math.ceil((box.conf[0] * 100)) / 100
            if conf < 0.5:
                continue 

            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
            # cv2.rectangle(img,(x1,y1),(x2,y2),(255,0,255),3)
            w, h = x2 - x1, y2 - y1
            cvzone.cornerRect(img, (x1, y1, w, h))
            
            center_x = (x1 + x2) // 2
            center_y = (y1 + y2) // 2
            cv2.circle(img, (center_x, center_y), 5, (255, 0, 0), -1)  

            distance_to_center = math.sqrt((center_x - screen_center_x) ** 2 + (center_y - screen_center_y) ** 2)

            detected_objects.append((classNames[int(box.cls[0])], distance_to_center))
    
    closest_distance = float("inf")
    for obj_name, distance in detected_objects:
        if distance < closest_distance:
            closest_distance = distance
            closest_distance_object = obj_name

    elapsed_time = time.time() - start_time
    if elapsed_time >= 5:  
        text = f'{closest_distance/10:.2f} santimetre mesafesinde bir {closest_distance_object} bulunmaktadır.'
        speech = gTTS(text=text, lang="tr", slow=False)
        speech.save(f'output{sayac}.mp3')
        pygame.mixer.music.load(f'output{sayac}.mp3')
        pygame.mixer.music.play()
        sayac += 1
        start_time = time.time()  

    for obj_name, distance in detected_objects:
        cvzone.putTextRect(img, f'{obj_name} {distance/10 :.2f} cm', (max(0, x1), max(35, y1)), scale=1, thickness=1)
   
    cv2.imshow("Image", img)
    key = cv2.waitKey(1)
    if key == 27:
        break

cap.release()
cv2.destroyAllWindows()
