In [2]:
import cv2
import mediapipe as mp
import numpy as np
import threading
import tensorflow as tf




In [12]:
label = "Warmup...."
n_time_steps = 10
lm_list = []
model = tf.keras.models.load_model("../../LSTM/models/nodwave.h5")

In [13]:
def make_landmark_timestep(results):
    l = []
    landmark_res = results.pose_landmarks.landmark
    for idx, lm in enumerate(landmark_res):
        l.append(lm.x)
        l.append(lm.y)
        l.append(lm.z)
        l.append(lm.visibility)
    return l

In [14]:
def draw_class_on_image(label, img):
    font = cv2.FONT_HERSHEY_SIMPLEX
    bottomLeftCornerOfText = (10, 30)
    fontScale = 1
    fontColor = (0, 255, 0)
    thickness = 2
    lineType = 2
    cv2.putText(img, label,
                bottomLeftCornerOfText,
                font,
                fontScale,
                fontColor,
                thickness,
                lineType)
    return img

In [15]:
# Define labels
# 0 - nodding
# 1 - handwave
def detect(model, lm_list):
    global label
    lm_list = np.array(lm_list)
    lm_list = np.expand_dims(lm_list, axis=0)
    results = model.predict(lm_list)
    if results[0][0] > 0.5:
        label = "WAVING HAND"
    else:
        label = "NODDING"
    return label

In [16]:
mp_drawing = mp.solutions.drawing_utils # Drawing helpers
mp_pose = mp.solutions.pose # Mediapipe Solutions
cap = cv2.VideoCapture(0)
lm_list = []
time_steps = 10

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # Recolor Feed
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        
        # Make Detections
        results = pose.process(image)

        # Detect after 10 frames
        if results.pose_landmarks:
            lm = make_landmark_timestep(results)
            lm_list.append(lm)
            if len(lm_list) == time_steps:
                t1 = threading.Thread(target=detect, args=(model, lm_list,))
                t1.start()
                lm_list = []
                
        # Recolor image back to BGR for rendering
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Pose Detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                 mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                               )
        # Prediction
        image = draw_class_on_image(label, image)
                        
        cv2.imshow('Webcam feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

(1, 10, 132)
(1, 10, 132)
[[3.9213322e-05]]
[[4.0022907e-05]]
(1, 10, 132)
[[3.9986782e-05]]
(1, 10, 132)
[[3.986335e-05]]
(1, 10, 132)
[[4.054026e-05]]
(1, 10, 132)
[[3.9860763e-05]]
(1, 10, 132)
[[4.1066138e-05]]
(1, 10, 132)
[[4.0772502e-05]]
(1, 10, 132)
[[0.9999642]]
(1, 10, 132)
[[0.99996704]]
(1, 10, 132)
[[0.99996704]]
(1, 10, 132)
[[0.9999671]]
(1, 10, 132)
[[0.9999671]]
(1, 10, 132)
[[0.9999671]]
(1, 10, 132)
[[0.9999671]]
(1, 10, 132)
[[0.99996716]]
(1, 10, 132)
[[0.99996716]]
(1, 10, 132)
[[0.9999662]]
(1, 10, 132)
[[0.04614886]]
(1, 10, 132)
[[4.45307e-05]]
(1, 10, 132)
[[0.00115431]]
(1, 10, 132)
[[7.359296e-05]]
(1, 10, 132)
[[4.4819684e-05]]
(1, 10, 132)
[[4.0078445e-05]]
(1, 10, 132)
[[3.9505267e-05]]
(1, 10, 132)
[[3.9642215e-05]]
(1, 10, 132)
[[4.162674e-05]]
(1, 10, 132)
[[4.690388e-05]]
(1, 10, 132)
[[5.566313e-05]]
(1, 10, 132)
[[5.1860592e-05]]
(1, 10, 132)
[[4.210668e-05]]
(1, 10, 132)
[[4.042541e-05]]
(1, 10, 132)
[[4.1476243e-05]]
(1, 10, 132)
[[3.958627e-05]]