## MediaPipe Simple Usage
https://developers.google.com/mediapipe/solutions/guide

On Ubuntu, `ls /dev/video*` displays available camera devices.

In [1]:
import cv2
import mediapipe as mp
import numpy as np
import time
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
cap = cv2.VideoCapture(0)
# 2 4 6

#cap.release()

### Hand Landmarks

In [6]:
hands = mp.solutions.hands
hands_mesh = hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

while True:
    success, bgr = cap.read()
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

    results = hands_mesh.process(rgb)

    if results.multi_hand_landmarks:
        for i in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(bgr, i, hands.HAND_CONNECTIONS, 
                landmark_drawing_spec=mp_drawing.DrawingSpec(color = (255, 0,0),circle_radius=4, thickness=3),
                connection_drawing_spec=mp_drawing.DrawingSpec(thickness=3, color=(0,0,255)))

    cv2.imshow("Hand Landmarks", bgr)

    key = cv2.waitKey(5)
    if key & 0xFF == 27 or key == ord('q'):
        cv2.destroyAllWindows()
        break


### Face Detection

In [9]:
face_detection = mp.solutions.face_detection.FaceDetection(min_detection_confidence=0.7)

while cap.isOpened():
    success, bgr = cap.read()
    start = time.time()

    # Convert the BGR image to RGB
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

    # Process the image and find faces
    results = face_detection.process(rgb)

    # If detected, put
    if results.detections:
        for id, detection in enumerate(results.detections):

            mp_drawing.draw_detection(bgr, detection)
            #print(id, detection)

            bBox = detection.location_data.relative_bounding_box
            h, w, c = bgr.shape
            boundBox = int(bBox.xmin * w), int(bBox.ymin * h), int(bBox.width * w), int(bBox.height * h)

            cv2.putText(bgr, f'{int(detection.score[0]*100)}%', (boundBox[0], boundBox[1] - 20), 
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)

    end = time.time()
    totalTime = end - start
    fps = 1 / totalTime
    cv2.putText(bgr, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    cv2.imshow('Face Detection', bgr)

    key = cv2.waitKey(5)
    if key & 0xFF == 27 or key == ord('q'):
        cv2.destroyAllWindows()
        break


### Pose estimation

In [10]:
import cv2
import mediapipe as mp
import numpy as np
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5,min_tracking_confidence=0.5)

while cap.isOpened():
    success, bgr = cap.read()

    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    results = pose.process(rgb)
    
    mp_drawing.draw_landmarks(
        bgr,
        results.pose_landmarks,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
    
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Pose', cv2.flip(bgr, 1))
    
    key = cv2.waitKey(5)
    if key & 0xFF == 27 or key == ord('q'):
        cv2.destroyAllWindows()
        break

### Holistic landmark detection

In [11]:
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5)

while cap.isOpened():
    success, bgr = cap.read()
    if not success:
        print("Ignoring empty camera frame.")
        continue

    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    results = holistic.process(rgb)
    
    mp_drawing.draw_landmarks(
        bgr,
        results.face_landmarks,
        mp_holistic.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles
        .get_default_face_mesh_contours_style())
    
    mp_drawing.draw_landmarks(
        bgr,
        results.pose_landmarks,
        mp_holistic.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles
        .get_default_pose_landmarks_style())
    
    cv2.imshow('MediaPipe Holistic', cv2.flip(bgr, 1))
    
    key = cv2.waitKey(5)
    if key & 0xFF == 27 or key == ord('q'):
        cv2.destroyAllWindows()
        break

### object detection

mediapipe introduction: https://developers.google.com/mediapipe/solutions/vision/object_detector/index

https://developers.google.com/mediapipe/solutions/vision/object_detector/python

plz download this detection model and put it as the same folder with this code:
https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite2_uint8.tflite

`
!wget -q -O efficientdet_lite2_uint8.tflite -q https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite2_uint8.tflite
`

mediapipe image class:
- https://developers.google.com/mediapipe/api/solutions/python/mp/Image
- https://developers.google.com/mediapipe/api/solutions/python/mp/ImageFormat

mediapipe sample code: https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/object_detection/python/object_detector.ipynb

In [48]:
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import ImageFormat

base_options = python.BaseOptions(model_asset_path='efficientdet_lite2_uint8.tflite')
options = vision.ObjectDetectorOptions(base_options=base_options,
                                       score_threshold=0.5)
detector = vision.ObjectDetector.create_from_options(options)

MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red

while cap.isOpened():
    success, bgr = cap.read()
    if not success:
        continue

    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    rgb_mp = mp.Image(image_format=ImageFormat.SRGB, data=rgb)
    
    detection_result = detector.detect(rgb_mp)
    
    for detection in detection_result.detections:
        # Draw bounding_box
        bbox = detection.bounding_box
        start_point = bbox.origin_x, bbox.origin_y
        end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
        cv2.rectangle(bgr, start_point, end_point, TEXT_COLOR, 3)

        # Draw label and score
        category = detection.categories[0]
        category_name = category.category_name
        probability = round(category.score, 2)
        result_text = category_name + ' (' + str(probability) + ')'
        text_location = (MARGIN + bbox.origin_x,
                         MARGIN + ROW_SIZE + bbox.origin_y)
        cv2.putText(bgr, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                    FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)

    cv2.imshow('MediaPipe Object detection', bgr)
    key = cv2.waitKey(5)
    if key & 0xFF == 27 or key == ord('q'):
        cv2.destroyAllWindows()
        break


### image segmentation (foreground detection)

mediapipe introduction: https://developers.google.com/mediapipe/solutions/vision/image_segmenter

plz download this detection model and put it as the same folder with this code: https://storage.googleapis.com/mediapipe-assets/deeplabv3.tflite

`
!wget -O deeplabv3.tflite -q https://storage.googleapis.com/mediapipe-assets/deeplabv3.tflite
`

mediapipe ImageSegmenterOptions:https://developers.google.com/mediapipe/api/solutions/python/mp/tasks/vision/ImageSegmenterOptions

mediapipe sample code: https://github.com/googlesamples/mediapipe/blob/main/examples/image_segmentation/python/image_segmentation.ipynb

In [47]:
base_options = python.BaseOptions(model_asset_path='deeplabv3.tflite')
options = vision.ImageSegmenterOptions(base_options=base_options,
                                      output_type = mp.tasks.vision.ImageSegmenterOptions.OutputType.CATEGORY_MASK
                                      )
# Create the image segmenter
segmenter = vision.ImageSegmenter.create_from_options(options)

while cap.isOpened():
    success, bgr = cap.read()
    if not success:
        continue

    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    rgb_mp = mp.Image(image_format=ImageFormat.SRGB, data=rgb)
    
    segmentation_result = segmenter.segment(rgb_mp)
    category_mask = segmentation_result[0]
    
    blurred_image = cv2.GaussianBlur(bgr, (55,55), 0)
    condition = np.stack((category_mask.numpy_view(),) * 3, axis=-1) != 0 # foreground(category_mask == 0): False, background: True 
    output_image = np.where(condition, bgr, blurred_image)

    cv2.imshow('MediaPipe Object detection', output_image)
    key = cv2.waitKey(5)
    if key & 0xFF == 27 or key == ord('q'):
        cv2.destroyAllWindows()
        break