In [1]:
import cv2
import torch
from ultralytics import YOLO
import mediapipe as mp
import numpy as np
from collections import deque

In [2]:
print(f"OpenCV: {cv2.__version__}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"MediaPipe: {mp.__version__}")
print(f"NumPy: {np.__version__}")

OpenCV: 4.13.0
PyTorch: 2.2.2
CUDA available: True
MediaPipe: 0.10.9
NumPy: 1.26.4


In [18]:
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,              
    refine_landmarks=True,        
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

In [19]:
# cap = cv2.VideoCapture(0)
# print("Press 'q' to quit")

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break

#     rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

#     results = face_mesh.process(rgb_frame)
    
#     # Draw landmarks if face detected
#     if results.multi_face_landmarks:
#         for face_landmarks in results.multi_face_landmarks:
#             mp_drawing.draw_landmarks(
#                 image=frame,
#                 landmark_list=face_landmarks,
#                 connections=mp_face_mesh.FACEMESH_TESSELATION,
#                 landmark_drawing_spec=None,
#                 connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
#             )
        
#         # Display status
#         cv2.putText(frame, "Face Detected", (10, 30),
#                     cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#     else:
#         cv2.putText(frame, "No Face", (10, 30),
#                     cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    
#     cv2.imshow('MediaPipe FaceMesh', frame)
    
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# cap.release()
# cv2.destroyAllWindows()
# face_mesh.close()

Press 'q' to quit


In [None]:
model = YOLO('yolov8n.pt')

In [None]:
cap = cv2.VideoCapture(0)
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    #yolo
    yolo_results = model(frame)
    
    # Check for phone
    phone_detected = False
    for box in yolo_results[0].boxes:
        if int(box.cls[0]) == 67 and float(box.conf[0]) > 0.5:
            phone_detected = True
            break
    
    # mediapipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    face_results = face_mesh.process(rgb_frame)
    
    face_detected = face_results.multi_face_landmarks is not None
    
    annotated_frame = yolo_results[0].plot()
    
    #face mesh
    if face_detected:
        for face_landmarks in face_results.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                image=annotated_frame,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
            )

    status_y = 30
    
    # Phone status
    phone_color = (0, 0, 255) if phone_detected else (0, 255, 0)
    phone_text = "PHONE: YES" if phone_detected else "PHONE: NO"
    cv2.putText(annotated_frame, phone_text, (10, status_y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, phone_color, 2)
    
    # Face status
    face_color = (0, 255, 0) if face_detected else (0, 0, 255)
    face_text = "FACE: YES" if face_detected else "FACE: NO"
    cv2.putText(annotated_frame, face_text, (10, status_y + 35),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, face_color, 2)
    
    cv2.imshow('Attention Detection', annotated_frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
face_mesh.close()