In [1]:
%pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.162-py3-none-any.whl.metadata (37 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Downloading matplotlib-3.10.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Downloading scipy-1.16.0-cp311-cp311-macosx_14_0_arm64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.7.1-cp311-none-macosx_11_0_arm64.whl.metadata (29 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.22.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.1 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting contourpy>=1.0.1 (from matplot

In [None]:
from ultralytics import YOLO
import cv2
import gradio as gr

from PIL import Image, ImageDraw, ImageFont

model = YOLO("yolov8n.pt")

def get_font():
    # OS별로 적절한 폰트 객체 반환 (한글 지원)
    from PIL import ImageFont
    import platform
    
    font_size = 25
    
    try:
        if platform.system() == "Windows":
            # 윈도우용 한글 폰트
            return ImageFont.truetype("malgun.ttf", font_size)
        elif platform.system() == "Darwin":  # macOS
            # 맥용 한글 폰트
            return ImageFont.truetype("AppleGothic.ttf", font_size)
        else:  # Linux      
            # 리눅스 기본 폰트
            return ImageFont.load_default(size=font_size)
    except IOError:
        # 폰트 파일이 없을 경우 기본 폰트 사용
        return ImageFont.load_default(size=font_size)

def detect_object(image_array):
        
    image = Image.fromarray(image_array.copy())
    draw = ImageDraw.Draw(image)
    font = get_font()

    response = model(image_array)
    bounding_box_list = response[0].boxes.xyxy.cpu().numpy()
    confidence_list = response[0].boxes.conf.cpu().numpy()
    label_index_list = response[0].boxes.cls.cpu().numpy()
    label_list = response[0].names

    for bounding_box, confidence, label_index in zip(bounding_box_list, confidence_list, label_index_list):
        x1, y1, x2, y2 = bounding_box
        label_text = label_list[label_index]

        draw.rectangle([(x1, y1), (x2, y2)], outline="green", width=2)
        draw.text([x1 + 5, y1 + 5], text="{}({:.2f}%)".format(label_text, confidence * 100), fill="red", font=font)

    return image

with gr.Blocks() as demo:

    def stream_webcam(image_array):
        detected_image = detect_object(image_array)
        return detected_image

    with gr.Row():
        webcam_image = gr.Image(label="실시간 화면", sources="webcam", width=640, height=480, streaming=True)
        output_image = gr.Image(label="검출 화면", type="pil")

    webcam_image.stream(stream_webcam, inputs=[webcam_image], outputs=[output_image])

demo.launch()

# image = cv2.imread("/Users/yubin/Downloads/ai-generated-8375142_1280.jpg")
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# detect_object(image)

  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.





0: 512x640 1 person, 66.3ms
Speed: 2.6ms preprocess, 66.3ms inference, 1.1ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 60.9ms
Speed: 1.8ms preprocess, 60.9ms inference, 0.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 52.1ms
Speed: 2.7ms preprocess, 52.1ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 59.4ms
Speed: 3.6ms preprocess, 59.4ms inference, 0.9ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 56.4ms
Speed: 3.0ms preprocess, 56.4ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 53.9ms
Speed: 3.7ms preprocess, 53.9ms inference, 0.8ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 54.7ms
Speed: 3.1ms preprocess, 54.7ms inference, 0.7ms postprocess per image at shape (1, 3, 512, 640)

0: 512x640 1 person, 52.5ms
Speed: 3.7ms preprocess, 52.5ms inference, 0.8ms postprocess per image at shape (1, 3, 51