In [6]:
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import time


# Load mô hình YOLOv8n đã huấn luyện
model = YOLO("best.pt")
model.info(verbose=True)


YOLOv8n-cls summary: 56 layers, 1,445,974 parameters, 0 gradients, 3.4 GFLOPs


(56, 1445974, 0, 3.3660928)

In [7]:
class_labels = list(model.names.values())

# Khởi tạo MediaPipe face detector
mp_face_detection = mp.solutions.face_detection
face_detector = mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5)

# Hàm mở rộng bounding box
def apply_offset(x, y, w, h, scale=1.2, frame_shape=(480, 640)):
    cx = x + w // 2
    cy = y + h // 2
    new_w = int(w * scale)
    new_h = int(h * scale)
    x1 = max(0, cx - new_w // 2)
    y1 = max(0, cy - new_h // 2)
    x2 = min(frame_shape[1], cx + new_w // 2)
    y2 = min(frame_shape[0], cy + new_h // 2)
    return x1, y1, x2 - x1, y2 - y1

# Webcam
cap = cv2.VideoCapture(0)
prev_time = 0

if not cap.isOpened():
    print("❌ Không thể mở webcam.")
    exit()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Detect face
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_detector.process(rgb_frame)

    if results.detections:
        for det in results.detections:
            bbox = det.location_data.relative_bounding_box
            ih, iw, _ = frame.shape
            x = int(bbox.xmin * iw)
            y = int(bbox.ymin * ih)
            w = int(bbox.width * iw)
            h = int(bbox.height * ih)

            # Offset 1.2x
            x, y, w, h = apply_offset(x, y, w, h, scale=1.2, frame_shape=frame.shape)
            roi = frame[y:y+h, x:x+w]
            if roi.shape[0] == 0 or roi.shape[1] == 0:
                continue

            # Xử lý ảnh: grayscale → resize → RGB 3 kênh
            gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            gray = cv2.resize(gray, (224, 224))
            rgb = np.stack([gray] * 3, axis=-1)

            # Dự đoán
            result = model(rgb, verbose=False)
            pred_idx = result[0].probs.top1
            pred_label = class_labels[pred_idx]
            conf = result[0].probs.top1conf.item()

            # Vẽ kết quả
            # Màu sắc
            if "no_mask" in pred_label:
                color = (0, 0, 255)     # đỏ
            elif pred_label in ["mc", "mmc"]:
                color = (0, 165, 255)   # cam
            else:
                color = (0, 255, 0)     # xanh
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, f"{pred_label} ({conf*100:.1f}%)", (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

    # Tính FPS
    curr_time = time.time()
    fps = 1 / (curr_time - prev_time) if prev_time > 0 else 0
    prev_time = curr_time

    # Hiển thị FPS lên frame
    cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)
    cv2.imshow("YOLOv8n Mask Detection Realtime", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()
