## I just use the file to test and play with the camera.
## DO NOT USE THIS FILE

In [14]:
import PyQt5
%matplotlib qt
import mediapipe as mp
import cv2
import numpy as np
import os
import pyzed.sl as sl
from IPython.display import clear_output
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import time

In [15]:
# Get MediaPipe Utilities and Model
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hand_model = mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.9)

In [16]:
# Initialize ZED object
zed = sl.Camera()

In [17]:
# Define Camera Initial Parameters
init = sl.InitParameters()
init.camera_resolution = sl.RESOLUTION.HD720
init.coordinate_units = sl.UNIT.METER
init.depth_maximum_distance = 3
init.depth_minimum_distance = 0.1
init.depth_mode = sl.DEPTH_MODE.NEURAL
init.camera_fps = 60

In [18]:
# Open the ZED Camera
zed.open(init)
print(zed.get_camera_information().camera_configuration.resolution.width)

1280


In [19]:
# Define Runtime Parameters
runtime = sl.RuntimeParameters()
runtime.enable_fill_mode = True

In [20]:
# Initialize Camera Readings
image = sl.Mat()
depth = sl.Mat()
point_cloud = sl.Mat()
confidence_map = sl.Mat()

In [21]:
# Get Camera Calibration Parameters
fx = zed.get_camera_information().camera_configuration.calibration_parameters.left_cam.fx
fy = zed.get_camera_information().camera_configuration.calibration_parameters.left_cam.fy
cx = zed.get_camera_information().camera_configuration.calibration_parameters.left_cam.cx
cy = zed.get_camera_information().camera_configuration.calibration_parameters.left_cam.cy


In [22]:
# Initialize Visualization Using Matplot
fig = plt.figure()

# enable interactive mode
plt.ion() 
ax = fig.add_subplot(111, projection = '3d')

In [23]:
while True:
    clear_output(wait=False)
    err = zed.grab(runtime)
    if err == sl.ERROR_CODE.SUCCESS:
        
        # Get Start Time of Each Loop for Calculating FPS
        time0 = time.time()
        
        # Get An Image From Left 
        zed.retrieve_image(image, sl.VIEW.LEFT)
        
        # Get Depth Data
        zed.retrieve_measure(depth, sl.MEASURE.DEPTH)
        
        # Get Point Cloud Data
        zed.retrieve_measure(point_cloud, sl.MEASURE.XYZRGBA)
        
        # Convert the Image So That MediaPipe Can Process
        img = image.get_data()
        img.flags.writeable = False
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Send image to MediaPipe model and get results
        results = hand_model.process(img)
        
        # And Then Convert Back for OpenCV to Display
        img.flags.writeable = True
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        
        # get Number of Hands
        num_hands = 0
        
        # get image shape
        h, w, _ = img.shape
        
        # Play with the Hand Landmarks
        if results.multi_hand_landmarks:
            ax.clear()
            
            for num, landmarks in enumerate(results.multi_hand_landmarks):
                
                left_data = []
                right_data = []
                
                # Get the number of hands
                num_hands = 1 + num
                
                # Get handedness (need to flip later)
                handedness = results.multi_handedness[results.multi_hand_landmarks.index(landmarks)].classification[0].label
                
                for id, landmark in enumerate(landmarks.landmark):
                    
                    # wrist's index is 0
                    if id == 0:
                        
                        # get wrist landmark coordinates
                        wrist_landmark_coordinates = [landmark.x, landmark.y, landmark.z]
                        
                        print(landmark.z)
                        # convert landmark to pixels
                        X, Y = int(landmark.x * w), int(landmark.y * h)
                        
                        # get point cloud values for this wrist pixel
                        if X > 0 and Y > 0:
                            err, point_cloud_value = point_cloud.get_value(X, Y)
                            wrist_position = [point_cloud_value[0],
                                              point_cloud_value[1],
                                              point_cloud_value[2]]
                    if id == 8:
                        print(landmark.z)
                        
                    # calculate more accurate positions
                    x_3d = wrist_position[0] + \
                           (landmark.x*w - wrist_landmark_coordinates[0]*w) * \
                           wrist_position[2]/fx
                            
                    y_3d = wrist_position[1] + \
                           (landmark.y*h - wrist_landmark_coordinates[1]*h) * \
                           wrist_position[2]/fy
                            
                    z_3d = wrist_position[2] + \
                           (landmark.z - wrist_landmark_coordinates[2])/ \
                           wrist_position[2]
                    
                    # this is the calculated 3d position of each landmark (joint)
                    hand_landmarks_3d = [x_3d, y_3d, z_3d]
                    
                    # save 3d position of each joint to different hands
                    if handedness == "Right":
                        left_data.append(hand_landmarks_3d)
                    elif handedness == "Left":
                        right_data.append(hand_landmarks_3d)
                    
                # convert to numpy
                left_data = np.array(left_data)
                right_data = np.array(right_data)
                    
                # Flipped detection, switch left and right in image, 
                # and plot in Matplot
                if handedness == "Right":
                    
                    cv2.putText(img, "Left", (X, Y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    
                    xlim=(-0.5, 0.5)
                    ylim=(-0.5, 0.5)
                    zlim=(0.2, 1.0)
                    ax.set_xlim3d(xlim)
                    ax.set_ylim3d(ylim)
                    ax.set_zlim3d(zlim)
                    ax.set_xlabel('x')
                    ax.set_ylabel('y')
                    ax.set_zlabel('z')
                    ax.scatter3D(left_data[0, 0],
                                 left_data[0, 1],
                                 left_data[0, 2], color = 'b')
                    plt.draw()
                    plt.pause(0.0001)
                
                elif handedness == "Left":
                    
                    cv2.putText(img, "Right", (X, Y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
                    
                    xlim=(-0.5, 0.5)
                    ylim=(-0.5, 0.5)
                    zlim=(0.2, 1.0)
                    ax.set_xlim3d(xlim)
                    ax.set_ylim3d(ylim)
                    ax.set_zlim3d(zlim)
                    ax.set_xlabel('x')
                    ax.set_ylabel('y')
                    ax.set_zlabel('z')
                    ax.scatter3D(right_data[0, 0],
                                 right_data[0, 1],
                                 right_data[0, 2], color = 'r')
                    plt.draw()
                    plt.pause(0.0001)
                    
                mp_drawing.draw_landmarks(img, landmarks, mp_hands.HAND_CONNECTIONS)
                
        # Print Number of Hands
        print(num_hands)
        
        # Calculate FPS        
        time1 = time.time()
        if (time1-time0) > 0:
            frames_per_sec = 1.0/(time1 - time0)
            cv2.putText(img, 'FPS: {}'.format(int(frames_per_sec)), 
                        (10, 30),cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 3)
        
        # Use OpenCV to display the Image
        # img = cv2.flip(img, 1)
        cv2.imshow("test", img)
        

    # Press Q to Stop
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
        
cv2.destroyAllWindows()

0


In [24]:
landmark.z/wrist_position[2]

-0.2041457037062253

In [25]:
wrist_position[2]

0.3160422146320343

In [26]:
((h/fy)**2 + (w/fx)**2)**(1/2)

2.7858258388457267

In [27]:
((fy/h)**2 + (fx/w)**2)**(1/2)

0.8400660748941304

In [8]:
13 not in [5, 30]

True