In [75]:
import numpy as np
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision


In [76]:
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red

def visualize(
    image,
    detection_result
) -> np.ndarray:
  """Draws bounding boxes on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)

    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)

  return image

In [77]:
model_path = './models/efficientdet_lite0_float16.tflite'

# Load the input image from an image file.
image_file = './dog_and_cat.png'
mp_image = mp.Image.create_from_file(image_file)


In [78]:
BaseOptions = mp.tasks.BaseOptions
ObjectDetector = mp.tasks.vision.ObjectDetector
ObjectDetectorOptions = mp.tasks.vision.ObjectDetectorOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    max_results=5,
    running_mode=VisionRunningMode.IMAGE)

with ObjectDetector.create_from_options(options) as detector:
    # The detector is initialized. Use it here.

    # Perform object detection on the provided single image.
    detection_result = detector.detect(mp_image)
    # 이미지를 np array로 변환
    image_copy = np.copy(mp_image.numpy_view())
    # 이미지에 detection 정보를 그린다.
    annotated_image = visualize(image_copy, detection_result)
    # Open CV의 BGR에서 RGB로 이미지를 변환한다.
    rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)

In [81]:
print(detection_result)

DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=72, origin_y=162, width=252, height=191), categories=[Category(index=None, score=0.7798683643341064, display_name=None, category_name='cat')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=303, origin_y=27, width=248, height=344), categories=[Category(index=None, score=0.7624295949935913, display_name=None, category_name='dog')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=44, origin_y=277, width=553, height=116), categories=[Category(index=None, score=0.02440771646797657, display_name=None, category_name='dining table')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=106, origin_y=0, width=70, height=8), categories=[Category(index=None, score=0.023307783529162407, display_name=None, category_name='person')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=368, origin_y=9, width=18, height=8), categories=[Category(index=None, score=0.02099349908530712, display_nam

In [80]:
# cv 이미지 출력 테스트
# cv_image = cv2.imread(image_file, cv2.IMREAD_COLOR)

cv2.imshow('test img', rgb_annotated_image)

cv2.waitKey(0)
cv2.destroyAllWindows()