# Summary : Main file for generating poses coordinates dataset. And loading the trained model for performing pose estimation 

## Structure of the file
 ### 1. Code for Movenet
 
 ### 2. Code for extracting points for model training
 
 ### 3. Yoga Pose Estimation using saved model

# 1. Code From Tensorflow Documentation with Apache 2.0 License ( Required to run Movenet )  [Start]

In [None]:
# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

Apache License : http://www.apache.org/licenses/LICENSE-2.0

## Visualization libraries & Imports

In [13]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

You should consider upgrading via the 'c:\users\mynam\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.
You should consider upgrading via the 'c:\users\mynam\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.
You should consider upgrading via the 'c:\users\mynam\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [14]:
!pip install tensorflow_hub



You should consider upgrading via the 'c:\users\mynam\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [15]:
!pip install PySimpleGUI



You should consider upgrading via the 'c:\users\mynam\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [16]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import numpy as np
import cv2

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

# Some modules to display an animation using imageio.
import imageio
from IPython.display import HTML, display

In [17]:
#@title Helper functions for visualization

# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot

def to_gif(images, fps):
  """Converts image sequence (4D numpy array) to gif."""
  imageio.mimsave('./animation.gif', images, fps=fps)
  return embed.embed_file('./animation.gif')

def progress(value, max=100):
  return HTML("""
      <progress
          value='{value}'
          max='{max}',
          style='width: 100%'
      >
          {value}
      </progress>
  """.format(value=value, max=max))

## Load Model from TF hub

In [18]:
model_name = "movenet_lightning" #@param ["movenet_lightning", "movenet_thunder", "movenet_lightning_f16.tflite", "movenet_thunder_f16.tflite", "movenet_lightning_int8.tflite", "movenet_thunder_int8.tflite"]

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores

# Tensorflow Movenet Code [End]

# 2. Code for loading Yoga dataset

### 2.1 Imports 

In [19]:
import os
import imageio as iio
from tqdm import tqdm
import time

import pandas as pd
from PIL import Image

### 2.1 Preprocessing

In [38]:
#Define all channels because images have different channels in some cases and different in other
all_channels = ['RGBA','CMYK', 'RGBA']
def image_reader_folder(path):
    images_list = os.listdir(path)
    image_read_list = []
    for image in images_list:
        image_read = cv2.imread(path+"/"+image)
        image_read_list.append(image_read)
    return image_read_list

In [39]:
#Detecting yoga poses here
def pose_detector(image, label):
    input_image = tf.expand_dims(image, axis=0)
    input_image = tf.image.resize_with_pad(input_image, input_size, input_size)
    keypoints_with_scores = movenet(input_image)
    feature_list = []
    for feature in keypoints_with_scores.squeeze():
        feature_list.append(feature[0])
        feature_list.append(feature[1])
    feature_list.append(label)
    return feature_list
        

In [40]:
#In order to use this code please replace the location of the below location from the dataset downloaded from kaggle
#Kaggle Link for dataset - https://www.kaggle.com/datasets/niharika41298/yoga-poses-dataset
list_coords = []
path_images_test = 'DATASET/TEST/'
path_images_train = 'DATASET/TRAIN/'
pose_names = ['downdog', 'goddess', 'plank', 'tree', 'warrior2']

count = 0 
for pose in pose_names:
    path = path_images_test + pose
    image_list = image_reader_folder(path)
    for image in tqdm(image_list):
        try:
            list_coords.append(pose_detector(image, pose))
        except Exception as e:
            print(e)
            print(image)
            count+=1
            continue 
print("Images with errors")
print(count)
#temp[:5]

100%|██████████████████████████████████████████████████████████████████████████████████| 97/97 [00:02<00:00, 39.61it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 80/80 [00:01<00:00, 44.86it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 115/115 [00:02<00:00, 50.83it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 69/69 [00:01<00:00, 45.02it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 109/109 [00:02<00:00, 50.90it/s]

Images with errors
0





In [41]:
#Defnining headers here with their key value pairs for identifying joings in human body
headers = {'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16 
            }

In [42]:

col_names = []
for x in headers.keys():
    col_names.append(x + '_x')
    col_names.append(x + '_y')
col_names.append('target')

In [43]:
#col_names = []
df_final = pd.DataFrame(list_coords, columns = col_names)
df_final.head()
df_final.to_csv('dataset.csv')

Note : Once 'dataset.csv' file is generated please run the file 'custom_model_'

# 3. Yoga Pose Estimation using saved model 

In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
# first neural network with keras make predictions
from numpy import loadtxt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from keras.optimizers import SGD, Adam, Adadelta, RMSprop
from keras.models import model_from_json

In [44]:
# manually calculating cosine to get the value with coordinates
def cosine(x1, y1, x0, y0, x2, y2):
    
    #shifting origin to get the vector
    x_1, y_1 = x1 - x0, y1 - y0
    x_2, y_2 = x2 - x0, y2 - y0
    
    #returning cosine
    return (x_2*x_1) + (y_2*y_1) / ((x_1)**2 + (y_1)**2)**0.5 * ((x_2)**2 + (y_2)**2)**0.5

In [48]:
# loading the prediction model
json_model = 'model.json'
model_h5 = 'model.h5'
json_file = open(json_model, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(model_h5)
print("Loaded model from disk")

Loaded model from disk


In [49]:
loaded_model.compile(Adam(lr = 0.01), "sparse_categorical_crossentropy", metrics = ["accuracy"])

In [50]:
target_pose_names = ['Downdog', 'Goddess', 'Plank', 'Tree', 'Warrior']

In [51]:
from imutils import resize


In [52]:
def WebCam(chosen):
    # capture video from webcam
    capture = cv2.VideoCapture(0)
    
    # run a loop to get all the frames
    while capture.isOpened():
        
        # reading the frames one by one
        ret, frame = capture.read()
        
        # prepping the frame for input in the model
        input_image = tf.expand_dims(frame.copy(), axis=0)
        input_image = tf.image.resize_with_pad(input_image, input_size, input_size)
        
        # running the input through movenet to get coordinates of joints
        keypoints_with_scores = movenet(input_image)
        
        feature_list = []
        # to get x and y of each joint separately
        for feature in keypoints_with_scores.squeeze()[5:]:
            feature_list.append(feature[0])
            feature_list.append(feature[1])
        # making it a dataframe for input in the classification model
        input_model = pd.DataFrame(feature_list).T
        
        # getting the index of the max probability and then using it to get the detected pose
        detected_pose = target_pose_names[np.argmax(loaded_model.predict(input_model))]
        
        # putting the detected pose as text on the video display
        cv2.putText(frame, 'Detected Yoga Pose:', (350, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.85, (255, 255, 255), 2, cv2.LINE_8)
        cv2.putText(frame, detected_pose, (450, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_4)
        
        
        cv2.putText(frame, "Press 'q' to quit.",  (10, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2, cv2.LINE_8)
        
        # if the option chosen is not manual, then constantly checking when the chosen pose is detected
        if chosen != 'Manual':
            # keeping the original text color as red
            color = (0, 0, 255)
            skip = False
            
            # if the detected pose is chosen pose, changing the color to green
            if chosen == detected_pose:
                color = (0, 255, 0)
                skip = True  # this variable is here for checking if downdog is selected and achieved
            cv2.putText(frame, 'Goal: '+str(chosen), (350, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.85, color, 2, cv2.LINE_8)
            
            # if downdog is selected then this feedback loop runs
            # if downdod is achieved then it doesn't go inside this loop
            if chosen == 'Downdog' and not(skip):
                feedback = 'Downward dog in progress'
                
                # first checking if the coordinates of shoulders are above the hip
                if feature_list[1] < feature_list[9]:
                    feedback = 'Start by bending down'
                    
                # then checking if the hip angle resides within the threshold range
                elif cosine(feature_list[6], feature_list[7], 
                            feature_list[14], feature_list[15], 
                            feature_list[18], feature_list[19]) < 0.04:
                    feedback = 'Bring hands closer to feet'
                elif cosine(feature_list[6], feature_list[7], 
                            feature_list[14], feature_list[15], 
                            feature_list[18], feature_list[19]) < 0.09:
                    feedback = 'Put hands further away from feet'
                
                # putting the feedback text 
                cv2.putText(frame, feedback, (350, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 0), 2, cv2.LINE_8)
        
        # displaying the joints on the image 
        display_image = tf.expand_dims(frame, axis=0)
        display_image = tf.cast(tf.image.resize_with_pad(display_image, 1280, 1280), dtype=tf.int32)
        output_overlay = draw_prediction_on_image(np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)
        
        # displaying live footage with all the overlays and texts
        cv2.imshow('Webcam footage', output_overlay)
        
        # check to quit the webcam window
        if cv2.waitKey(10)& 0xFF==ord('q'):
            break
    capture.release()

    cv2.destroyAllWindows()

In [62]:
import PySimpleGUI as sg

# using PySimpleGUI to get input from the user
event, values = sg.Window('Choose an option', [[sg.Text('Yoga Poses ->'), sg.Listbox(target_pose_names + ['Manual'] , size=(20, 6), key='LB')],
    [sg.Button('OK'), sg.Button('Cancel')]]).read(close=True)

if event == 'OK':
    try:
        chosen = values["LB"][0]
    except:
        chosen = 'Manual'
    WebCam(chosen)
    
else:
    sg.popup_cancel('User aborted')