In [1]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers

In [4]:
DATA_DIR = 'data'

The MoveNet output is a tensor of shape 1 x 1 x 17 x 3 because:

- The first dimension (1) represents the batch size, as you are only processing one image at a time.
- The second dimension (1) is not used and can be ignored.
- The third dimension (17) represents the 17 keypoints detected by MoveNet for a single pose.
- The fourth dimension (3) represents the y-coordinate, x-coordinate and confidence score of each keypoint.

In [17]:
class KeypointsExtract(layers.Layer):
    def __init__(self):
        super(KeypointsExtract, self).__init__()
        self.model = hub.load("https://www.kaggle.com/models/google/movenet/TensorFlow2/singlepose-thunder/4")
        self.movenet = self.model.signatures['serving_default']
        
    def call(self, images):
        def process_single_image(image):
            image = tf.expand_dims(image, axis=0)
            image = tf.cast(image, dtype=tf.int32)
            keypoints = self.movenet(image)['output_0']
            keypoints = tf.squeeze(keypoints)
            # return only the first 13 keypoints and their coordinates
            return tf.reshape(keypoints[:13,:3], [13*3])
        
        keypoints_batch = tf.map_fn(process_single_image, images, fn_output_signature=tf.float32)
        return keypoints_batch

In [18]:
train_ds, test_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    image_size=(256, 256),
    seed=42,
    validation_split=0.1,
    subset="both",
    crop_to_aspect_ratio=True
)

Found 1722 files belonging to 2 classes.
Using 1550 files for training.
Using 172 files for validation.


In [20]:
keypoints_extract = KeypointsExtract()

model = tf.keras.Sequential([
    layers.Input(shape=(256, 256, 3)),
    keypoints_extract,
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keypoints_extract_6 (Keypoi  (None, 39)               0         
 ntsExtract)                                                     
                                                                 
 dense_18 (Dense)            (None, 128)               5120      
                                                                 
 dense_19 (Dense)            (None, 64)                8256      
                                                                 
 dense_20 (Dense)            (None, 1)                 65        
                                                                 
Total params: 13,441
Trainable params: 13,441
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(train_ds, epochs=10)

Epoch 1/10
