In [121]:
import numpy as np
import mediapipe as mp
from PIL import Image
import glob
import json

In [122]:
test_images_paths = glob.glob('test_images/*.jpg')
test_images_filenames = [test_images_path.split('/')[-1].split('.')[0] for test_images_path in test_images_paths]
test_images = [Image.open(path) for path in test_images_paths]

In [123]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2, min_detection_confidence=0.5)

I0000 00:00:1711840269.355897       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 86), renderer: Apple M1 Pro


In [124]:
hand_landmarks_data = []
for i, img in enumerate(test_images):
    # Process the image and get the hand landmarks.
    results = hands.process(np.array(img))

    # Save landmarks to a JSON array.
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = [{'x': landmark.x, 'y': landmark.y, 'z': landmark.z} 
                        for landmark in hand_landmarks.landmark]
            hand_landmarks_data.append(landmarks)

In [125]:
def dict_to_arr(dict):
    return np.array([dict['x'], dict['y'], dict['z']])

In [126]:
hand_landmarks_data_arr = np.zeros(shape=(len(test_images), 21, 3), dtype=float)
for i, hand_data in enumerate(hand_landmarks_data):
    for j, landmark in enumerate(hand_data):
        hand_landmarks_data_arr[i, j] = dict_to_arr(landmark)

In [127]:
results_dicts = []
for i, test_images_filename in enumerate(test_images_filenames):
    # results_dict[test_images_filename] = hand_landmarks_data_arr[i].round(2)
    # results_dict[test_images_filename] = np.around(hand_landmarks_data_arr[i], 2)
    results_dicts.append({"name": test_images_filename, "data": np.around(hand_landmarks_data_arr[i], 4).tolist()})

In [128]:
results_dicts

[{'name': 'top_left',
  'data': [[0.232, 0.5089, 0.0],
   [0.3153, 0.4987, -0.0272],
   [0.3728, 0.4646, -0.0428],
   [0.4134, 0.4167, -0.0519],
   [0.4397, 0.3733, -0.0609],
   [0.3196, 0.3709, -0.0488],
   [0.3588, 0.3005, -0.0602],
   [0.3741, 0.2585, -0.064],
   [0.3822, 0.2278, -0.0669],
   [0.2712, 0.3583, -0.0407],
   [0.2904, 0.2719, -0.0566],
   [0.2986, 0.2185, -0.0663],
   [0.3013, 0.1818, -0.0726],
   [0.2295, 0.3647, -0.032],
   [0.2268, 0.2848, -0.0471],
   [0.2251, 0.2323, -0.0613],
   [0.2246, 0.1926, -0.0709],
   [0.1989, 0.3848, -0.0235],
   [0.1766, 0.331, -0.0383],
   [0.1657, 0.2959, -0.0493],
   [0.1601, 0.2658, -0.0558]]},
 {'name': 'hand_180',
  'data': [[0.5675, 0.326, 0.0],
   [0.4671, 0.3573, -0.0486],
   [0.3999, 0.4181, -0.0745],
   [0.3577, 0.4868, -0.0914],
   [0.3181, 0.5418, -0.1079],
   [0.4821, 0.5337, -0.0715],
   [0.4518, 0.6356, -0.0951],
   [0.44, 0.6978, -0.1099],
   [0.434, 0.748, -0.1212],
   [0.5451, 0.5423, -0.0597],
   [0.5343, 0.651, -0.083

In [129]:
class NumpyArrayEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

encoder = NumpyArrayEncoder
encoder.FLOAT_REPR = lambda o: format(o, '.3f')

In [130]:
# Save to a JSON file.
with open('hand_landmarks.json', 'w') as f:
    # json.dump(results_dict, f, cls=encoder)
    json.dump(results_dicts, f, cls=encoder, indent=2)
    # json.dump(results_dict, f, cls=NumpyArrayEncoder, indent=4)