Mount google drive and count images files in dataset


In [2]:
from google.colab import drive
import os
import shutil

drive_dir = "/content/drive/My Drive/Datasets/"
targz_path = "mpii_human_pose_v1.tar.gz"
extract_dir = "images/"

drive.mount("/content/drive", force_remount=False)

path = drive_dir + extract_dir
file_count = len([f for f in os.listdir(path)if os.path.isfile(os.path.join(path, f))])

print ("There are " + str(file_count) + " files in directory.")

Mounted at /content/drive
There are 5488 files in directory.


Importing utilities and libraries for file systems, datasets, image processing, and learning

In [3]:
!pip install console-progressbar

import time
import itertools
from os.path import join

import numpy as np
import pandas as pd
import scipy.io as sio
from sklearn.utils import shuffle

from PIL import Image
import cv2
from google.colab.patches import cv2_imshow
from console_progressbar import ProgressBar

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
#from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Dense, Layer
from keras.losses import MeanSquaredError
print("\nTF version:", tf.__version__)
print("Hub version:", hub.__version__)
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

Collecting console-progressbar
  Downloading https://files.pythonhosted.org/packages/d5/8d/810e5c5dbdefc92cc1819d0b6ffac2c9c149acece9b3e55e4d9d05d0bb2a/console_progressbar-1.1.2.tar.gz
Building wheels for collected packages: console-progressbar
  Building wheel for console-progressbar (setup.py) ... [?25l[?25hdone
  Created wheel for console-progressbar: filename=console_progressbar-1.1.2-cp36-none-any.whl size=4142 sha256=4c8727b58c1f3a424601762858c28ce452df60d664b1ac6447fdbb8a01ee06e7
  Stored in directory: /root/.cache/pip/wheels/35/48/4c/dfcbbd70b7a1690c7113e01fa2201a809203078d96de82b900
Successfully built console-progressbar
Installing collected packages: console-progressbar
Successfully installed console-progressbar-1.1.2

TF version: 2.3.0
Hub version: 0.9.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU is available



![Convolutional Pose Machine](https://drive.google.com/uc?export=view&id=1WrWE-qi-I5Q5GJOANEFYgUYQsqoiM_zQ)




Define the convolutional pose machine architecture:

 - It has n stages where n > 1

 - Initial stage is comprised of:
     - Interleaved convolutional and pooling layers for feature recognition with translational invariance
     - Two fully connected layers for classification of the most abstract features
 - Each subsequent stage is comprised of 
    - Interleaved convolutional and pooling layers for feature recognition performed on the input image
    - Concatenation of the belief maps from above and the previous stage
    - Three further convolutional layers
    - Two fully connected layers for classification of the most abstract features
    - A intermediate loss calculation to cache
 - The stage loss is aggregated in place of end-of-model loss calculattion to reduce the risk of the vanishing gradient problem encountered in some feed forward neural nets



In [72]:
class ConvPoseMachine (keras.models.Sequential):
  def __init__(self, num_stages, num_parts, loss_function = MeanSquaredError()):
      super(ConvPoseMachine, self).__init__()
      assert num_stages > 1, "There must be at least an initial and subsequent stage"
      self.num_stages = num_stages
      self.num_parts = num_parts
      self.loss_function = loss_function
      self.stage = 0
      self.loss = 0
      self.total_loss = 0
      self.build_model()
      
  def build_model(self):
    self.shared_layers = []
    self.stage_layers  = []

    filters = self.num_parts

    """
        Shared Layers
    """
    # Triple 9x9 conv encoder
    self.shared_layers.append(Conv2D(filters,
                                      name='conv-shared-1-9x9',
                                      kernel_size=9,
                                      activation='relu'))
    
    self.shared_layers.append(MaxPooling2D(name='pool-shared-2',
                                            pool_size=(2, 2)))
    
    self.shared_layers.append(Conv2D(filters,
                                      name='conv-shared-2-9x9',
                                      kernel_size=9,
                                      activation='relu'))
    
    self.shared_layers.append(MaxPooling2D(name='pool-shared-2',
                                            pool_size=(2, 2)))
    
    self.shared_layers.append(Conv2D(filters,
                                      name='conv-shared-3-9x9',
                                      kernel_size=9,
                                      activation='relu'))
    
    self.shared_layers.append(MaxPooling2D(name='pool-shared-3',
                                            pool_size=(2, 2)))

    self.shared_layers.append(Conv2D(filters,
                                      name="conv-shared-4-5x5",
                                      kernel_size=5,
                                      activation='relu'))
    
    """
        Initial Stage
    """

    self.stage_layers.append([])

    """ self.stage_layers[0].append(Conv2D(filters,
                                        name="conv-0-9x9", 
                                        kernel_size=9,
                                        activation='relu')) """
    
    # Fully connnected layer for classification of the most abstract features
    self.stage_layers[0].append(Dense(filters,
                                name="dense-0-1",
                                activation='relu'))
    self.stage_layers[0].append(Dense(filters,
                                name="dense-0-2",
                                activation='relu'))
    

    """
        Subsequent Stages
    """
    for stage in range (1, self.num_stages):
      self.stage_layers.append([])
      for i in range (3):
        self.stage_layers[stage].append(Conv2D(filters,
                                                name="conv-" + str(stage) + "-11x11",
                                                kernel_size=11,
                                                activation='relu'))
        
      # Fully connnected layer for classification of the most abstract features
      self.stage_layers[stage].append(Dense(filters,
                                            name="dense-" + str(stage) + "-1",
                                            activation='relu'))
      self.stage_layers[stage].append(Dense(filters,
                                            name="dense-" + str(stage) +"-2",
                                            activation='relu'))
                        
    

  def call(self, image, y_true):
    for stage in range(self.num_stages):
      x = tf.Variable(image)
      # Run shared layers on image
      for layer in self.shared_layers:
        print(layer.name + " - input shape: " + str(x.shape))
        x = layer(x)

      if stage == 0:
        for layer in self.stage_layers[stage]:
          print(layer.name + " - input shape: " + str(x.shape))
          x = layer(x)

        y_true_pdf = build_heatmaps(y_true.numpy(), x.shape[1])

        self.stage_loss(x, y_true_pdf)

      else: 
        x = tf.math.add(x, x_n_1)

        for layer in self.stage_layers[stage]:
          print(layer.name + " - input shape: " + str(x.shape))
          x = layer(x)

        y_true_pdf = build_heatmaps(y_true.numpy(), x.shape[1])

        self.stage_loss(x, y_true)

      x_n_1 = tf.Variable(x)
    
    return x

  def stage_loss (self, y_true, y_pred):
    loss = self.loss_function(y_true, y_pred).numpy()

    assert loss > 0, "Stage loss less than zero"

    self.loss = loss
    self.total_loss += self.loss

    print("Stage %d loss: %f\n"% (self.stage, self.loss))

    self.stage += 1

Utility functions for drawing gaussian belief maps, normalizing images, plotting joints on the images

In [75]:
def build_heatmaps(batch_keypoints, heatmap_dim):
  print(heatmap_dim)

  scale = 368 // heatmap_dim
  batch_keypoints = batch_keypoints / scale

  num_features = batch_keypoints.shape[1]
  heatmaps = np.zeros((batch_keypoints.shape[0], heatmap_dim, heatmap_dim, num_features), dtype=float)

  for image_keypoints in batch_keypoints: 
    for j in range (num_features):
      heatmaps[ :,:, j] = keypoint_gaussian(heatmaps[:,:,j], image_keypoints[j])

  return heatmaps


def keypoint_gaussian(image, kp, var = 1) :
  y_bound, x_bound = image.shape[:2]

  # Calculate range of gaussian 
  top_left = [int(kp[0] - 3 * var), 
              int(kp[1] - 3 * var)]
  bottom_right = [int(kp[0] + 3 * var + 1), 
                  int(kp[1] + 3 * var + 1)]
  
  # Check if entire gaussian distribution is out of image range
  if top_left[0] > x_bound or top_left[1] > y_bound \
    or bottom_right[0] < 0 or bottom_right[0] < 0:
    return image

  # Compute 2 Dimensional Gaussian (un-normalized)
  range = 6 * var + 1
  x = np.arange(range, dtype=float)
  x_c = x - (range//2)
  y_c = x_c[:, np.newaxis]
  g = np.exp(- (x_c ** 2 + y_c ** 2) / (2 * var ** 2))

  # gaussian range in image
  g_x = max(0, -top_left[0]), min(bottom_right[0], x_bound) - top_left[0]
  g_y = max(0, -top_left[1]), min(bottom_right[1], y_bound) - top_left[1]

  # Image range
  i_x = max(0, top_left[0]), min(bottom_right[0], x_bound)
  i_y = max(0, top_left[1]), min(bottom_right[1], y_bound)

  # Copy gaussian to image
  image [i_y[0]:i_y[1], i_x[0]:i_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
  return image
  

def normalize (image):
  # Reduce magnitude
  image = image / 255.0

  # Center around channel mean
  channel_mean = [0,0,0]
  for i in range(3):
    channel_mean[i] = image[:,:,i].mean(axis=(0, 1))
    image[:, :, i] -= channel_mean[i]

  assert np.max(image) < 1 and np.min(image) > -1, "Problem normalizing image"

  return image.astype(np.float32)


def visualize_heatmaps(heatmaps):
  kron_prod_dim = (368 // heatmaps.shape[0], 368 // heatmaps.shape[1])
  print(kron_prod_dim)
  for f in range (heatmaps.shape[-1]):
    unnormalized_grey_map =  (256 * heatmaps[ :,:, f]).astype(int)
    stretched_map = np.expand_dims(np.kron(unnormalized_grey_map, np.ones(kron_prod_dim)), axis=2)
    cv2_imshow(stretched_map)
    print("\n")


def plot_joints(image, y_pred = [], y_true = []):
  assert image is not None

  for coordinate in y_pred:
    cv2.circle(image, coordinate, 1, (0,0, 255), 2)

  for coordinate in y_true:
    cv2.circle(image, coordinate, 1, (0,255, 0), 2)

  try: 
    cv2_imshow(image)
  except AssertionError as error:
    print(repr(error))

Define bounding box class for redimensioning image centered on the subject

In [8]:
class BoundingBox():
  def __init__(self, points):
    self.top_left =  np.min(points, axis=0)
    self.bottom_right = np.max(points, axis=0)
    self.update_whc()

  def update_whc(self):
    self.width = np.abs(self.bottom_right[0] - self.top_left[0])
    self.height = np.abs(self.bottom_right[1] - self.top_left[1])
    self.center = np.array(self.top_left[0] + self.width/2, self.top_left[1] + self.height/2)

  def rescale(self, scalingFactor):
    # Multiply with unsafe casting
    self.top_left =  scalingFactor * self.top_left
    self.bottom_right = scalingFactor * self.bottom_right
    self.update_whc()

  def expand (self, size):
    expansion_vector = np.array([(size - self.width)/2, (size - self.height)/2]) 
    self.top_left -= expansion_vector
    self.bottom_right += expansion_vector
    self.update_whc()

  def tl_br (self):
    return tuple(self.top_left.astype(int)), tuple(self.bottom_right.astype(int))

Reorganize matlab structure into pandas dataframe

In [9]:
MPII_FIELDS = ['NAME','r ankle_X','r ankle_Y', 'r knee_X','r knee_Y', 'r hip_X',
               'r hip_Y', 'l hip_X','l hip_Y', 'l knee_X','l knee_Y', 'l ankle_X',
               'l ankle_Y','pelvis_X','pelvis_Y','thorax_X','thorax_Y','upper neck_X',
               'upper neck_Y', 'head top_X','head top_Y', 'r wrist_X','r wrist_Y',
               'r elbow_X','r elbow_Y', 'r shoulder_X','r shoulder_Y','l shoulder_X',
               'l shoulder_Y','l elbow_X','l elbow_Y', 'l wrist_X','l wrist_Y','Scale',
               'Activity','Category']

DATASET_SIZE = 24984 

class MpiiDataset():
  def __init__(self, image_dir, annotation_path, train=True, csv=False, dataset_size = None, filter = []):
    self.network_input_dim = 368
    self.train = train
    if dataset_size == None:
      if self.train:
        dataset_size = 17372
      else:
        dataset_size = 7612
    
    self.image_dir = image_dir
    self.bboxes = {}

    print("Loading annotations...")

    pb_sample_percent = dataset_size /100

    set_size = 0

    pb = ProgressBar(total=100, decimals=0, length=50, fill='X', zfill='-')

    release = sio.loadmat(annotation_path, struct_as_record=False)['RELEASE']

    print("Transforming annotations...")

    obj = release[0,0]

    annolist = obj.annolist
    train_flags = obj.img_train
    act = obj.act

    self.labels = pd.DataFrame(columns=MPII_FIELDS)

    # for each annotated image record
    for i in range(0,annolist.shape[1]):

      # Only save training or test images
      if not train_flags[0,i] == self.train:
        continue
        
      temp = []
      obj_list = annolist[0,i]
      obj_act = act[i,0]
      
      rect =obj_list.__dict__['annorect']
      img_d = obj_list.__dict__['image']

      if rect.shape[0] == 0:
        continue
          
      obj_rect = rect[0,0]
      obj_img = img_d[0,0]

      if 'annopoints' not in obj_rect._fieldnames:
        continue

      
      # Write image name to record
      name = obj_img.__dict__['name'][0]
      annopoints = obj_rect.__dict__['annopoints']
      
      if annopoints.shape[0]==0:
        continue
        
      if not filter == [] and not name in filter:
        continue

      points = annopoints[0,0].__dict__['point']

      temp.append(name)
    
      # Set default keypoint coordinate value -1
      for n in range(0,32):
        temp.append(-1)

      keypoints = []
      # Write keypoints to record
      for px in range(0,points.shape[1]):
        point = points[0,px]
        id = point.__dict__['id']
        x = point.__dict__['x']
        y = point.__dict__['y']
        array_index = 2 * id[0][0] + 1
        temp[array_index] = x[0][0]
        temp[array_index+1] = y[0][0]
        keypoints.append((x[0][0], y[0][0]))
      
      # Store bboxes in seperate map from dataframe
      self.bboxes[str(name)] = BoundingBox(keypoints)

      # Write ratio of box size to 200px height
      scale = obj_rect.__dict__['scale'][0][0]
      temp.append(scale)

      # Write activity/category, take the first index if passed list
      activity = act[i,0]
      activity_name = activity.act_name
      category_name = activity.cat_name

      if activity_name.shape[0]==0:
          temp.append(activity_name)
      else:
          temp.append(activity_name[0])
      if category_name.shape[0]==0:
          temp.append(category_name)
      else:
          temp.append(category_name[0])

      self.labels = pd.concat([self.labels, pd.DataFrame([temp],columns=MPII_FIELDS)])

      pb.print_progress_bar(int( set_size / pb_sample_percent)) 

      set_size += 1
      if set_size >= dataset_size:
        break
      
    print("\n" + ("Training" if self.train else "Testing") + " annotations dataframe (size " + str(self.labels.shape) + ") loaded")

    if (csv):
      file_name = "train" if self.train else "test" + '_mpii.csv'
      data.to_csv(file_name)
      print("Dataset written to " + file_name)

  def preprocess_image(self, image_name):

    image_path = join(image_dir, image_name)
    image = cv2.imread(image_path)

    # Cache image keypoints and human's bounding box
    label = self.labels[self.labels['NAME'].str.contains(image_name)]
    bbox = self.bboxes[image_name]

    top_left, bottom_right = bbox.tl_br()

    # Scale image to have human roughly 200 px in height
    targetHeight = 200.0
    scalingFactor = targetHeight / bbox.height
    image = cv2.resize(image, (0, 0), fx=scalingFactor, fy=scalingFactor)
    bbox.rescale((scalingFactor, scalingFactor))

    top_left, bottom_right = bbox.tl_br()

    bbox.expand(self.network_input_dim)

    half_cross = np.full((1, 2), self.network_input_dim / 2).astype(int)[0]
    full_cross = np.add(half_cross, half_cross)
    half_cross_tuple = (self.network_input_dim // 2,  self.network_input_dim // 2)

    # Pad image with black
    pad_image = np.pad(image, (half_cross_tuple, half_cross_tuple, (0, 0)), mode='constant')

    # Add margin to bounding box top left for cropping start and image diagonal for cropping end
    start = np.add(bbox.top_left.astype(int), half_cross)
    end =  np.add(start, full_cross)

    # Crop image to network input dimensions with human centered
    crop_image = pad_image[start[1]:end[1], start[0]:end[0]]

    # Perform similar transformations on laabeled annotations
    labelX = (np.array(label.iloc[:, 1:32:2])* scalingFactor + half_cross[0] - start[0]).astype(np.int32)[0]
    labelY = (np.array(label.iloc[:, 2:33:2])* scalingFactor + half_cross[0] - start[1]).astype(np.int32)[0]
    indices = range(0, len(labelX))
    
    transformed_labels = list(map(lambda x: (labelX[x], labelY[x]), indices))
    #transformed_labels = np.hstack([labelY, labelX])

    return crop_image, transformed_labels

  def get_data(self, validation_split=0.2):
    images = [f for f in os.listdir(self.image_dir)if os.path.isfile(os.path.join(self.image_dir, f))]

    #label_images = self.labels['NAME'].to_list()
    # compute label-image set intersection
    dataset = list (set(self.labels['NAME'].to_list()) & set(images))
    
    print(dataset)

    train_len = int(len(dataset) * (1 - validation_split))
    #for image_name in dataset[:train_len]:
    inputs, y_true = [], []
    for image_name in dataset:
      #yield self.preprocess_image(image_name)
      image, label = self.preprocess_image(image_name)
      inputs.append(normalize(image))
      y_true.append(label)

    return inputs, y_true


Training function (Work in progress)

In [76]:
image_dir = "/content/drive/My Drive/Datasets/images/"
annotation_dir = "/content/drive/My Drive/Datasets/mpii_human_pose_annotation.mat"

num_joints = (len(MPII_FIELDS) - 4) // 2 # Exclude Name, Scale, Activity, Category

optimizer = SGD(learning_rate=1e-3)
loss_function = MeanSquaredError()

cpm = ConvPoseMachine(num_stages = 3, num_parts = num_joints, loss_function = loss_function)

def train (model, batch_size = 500, num_epochs = 2):
  mpii =  MpiiDataset(image_dir, annotation_dir, dataset_size=batch_size)

  x_train, y_train = mpii.get_data()

  train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
  train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

  print (train_dataset)

  for epoch in range(num_epochs):

    print("\nBegin epoch %d" % (epoch,))
    epoch_start = time.time()
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):

      with tf.GradientTape() as tape:
        y_batch_pred = model(x_batch_train, y_batch_train, training=True) 


train (cpm, 6)

Loading annotations...
Transforming annotations...
 |XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX---------| 83% 
Training annotations dataframe (size (6, 36)) loaded
['005808361.jpg']
<BatchDataset shapes: ((None, 368, 368, 3), (None, 16, 2)), types: (tf.float32, tf.int32)>

Begin epoch 0
conv-shared-1-9x9 - input shape: (1, 368, 368, 3)
pool-shared-2 - input shape: (1, 360, 360, 16)
conv-shared-2-9x9 - input shape: (1, 180, 180, 16)
pool-shared-2 - input shape: (1, 172, 172, 16)
conv-shared-3-9x9 - input shape: (1, 86, 86, 16)
pool-shared-3 - input shape: (1, 78, 78, 16)
conv-shared-4-5x5 - input shape: (1, 39, 39, 16)
dense-0-1 - input shape: (1, 35, 35, 16)
dense-0-2 - input shape: (1, 35, 35, 16)
35
Stage 0 loss: 0.000642

conv-shared-1-9x9 - input shape: (1, 368, 368, 3)
pool-shared-2 - input shape: (1, 360, 360, 16)
conv-shared-2-9x9 - input shape: (1, 180, 180, 16)
pool-shared-2 - input shape: (1, 172, 172, 16)
conv-shared-3-9x9 - input shape: (1, 86, 86, 16)
pool-shared-3 - input 

ValueError: ignored

In [92]:
image_dir = "/content/drive/My Drive/Datasets/images/"
annotation_dir = "/content/drive/My Drive/Datasets/mpii_human_pose_annotation.mat"

def annotate(image, bbox, out_labelX, out_labelY):
  anno_image = image.copy()
  tl, br = bbox.tl_br()
  cv2.rectangle(anno_image, tl, br, (255,0,0), 2, 1)
  for i in range (0, len(out_labelX)):
    cv2.circle(anno_image, (out_labelX[i], out_labelY[i]), 2, (0,0,255), 2)
  print(image.shape)
  print(tl, br)
  cv2_imshow(anno_image)

mpii = MpiiDataset(image_dir, annotation_dir, filter=["000001163.jpg", "014378517.jpg"] )

if mpii.shuffle:
    mpii.labels = mpii.labels.sample(frac=1).reset_index(drop=True)

image_name = '014378517.jpg'

# Load Image
image_path = join(image_dir, image_name)
print (image_path)
image = cv2.imread(image_path)

# Cache labels
label = mpii.labels[mpii.labels['NAME'].str.contains(image_name)]

#Cache bounding box
bbox = mpii.bboxes[image_name]

top_left, bottom_right = bbox.tl_br()

print(top_left, bottom_right) 

out_labelX = (np.array(label.iloc[:, 1:32:2])).astype(np.int32)[0]
out_labelY = (np.array(label.iloc[:, 2:33:2])).astype(np.int32)[0]
annotate(image, bbox, out_labelX, out_labelY)

print ("box dim: " + str((bbox.width, bbox.height)))

print (image.shape)

# Bounding box target height, scale image to achieve this sizing
targetHeight = 200.0
scalingFactor = targetHeight / bbox.height

print(scalingFactor)

image = cv2.resize(image, (0, 0), fx=scalingFactor, fy=scalingFactor)
bbox.rescale((scalingFactor, scalingFactor))

out_labelX = (np.array(label.iloc[:, 1:32:2])* scalingFactor).astype(np.int32)[0]  
out_labelY = (np.array(label.iloc[:, 2:33:2])* scalingFactor).astype(np.int32)[0]
annotate(image, bbox, out_labelX, out_labelY)

print ("box dim: " + str((bbox.width, bbox.height)))

top_left, bottom_right = bbox.tl_br()


bbox.expand(mpii.network_input_dim)

half_cross = np.full((1, 2), mpii.network_input_dim / 2).astype(int)[0]
full_cross = np.add(half_cross, half_cross)
half_cross_tuple = ( mpii.network_input_dim // 2,  mpii.network_input_dim // 2)

print(half_cross)
print(full_cross)
print(half_cross_tuple)

start = np.add(bbox.top_left.astype(int), half_cross)
end =  np.add(start, full_cross)

print ("start " + str(start) + "\nend " + str(end))
padded_image = np.pad(image, (half_cross_tuple, half_cross_tuple, (0, 0)), mode='constant')


cv2.rectangle(padded_image, tuple(start), tuple(end), (255,255,255), 2)
cv2_imshow(padded_image)
print(padded_image.shape)

cropped_image = padded_image[start[1]:end[1], start[0]:end[0]]
cv2_imshow(cropped_image)

print(cropped_image.shape)

top_left, _ = bbox.tl_br()

print("top_left " + str(top_left))
print ("bottom_right " + str(_))

out_labelX = (np.array(label.iloc[:, 1:32:2])* scalingFactor + half_cross[0] - start[0]).astype(np.int32)[0]  
out_labelY = (np.array(label.iloc[:, 2:33:2])* scalingFactor + half_cross[0] - start[1]).astype(np.int32)[0]
print(out_labelX)
print(out_labelY)#
out_label = np.hstack([out_labelY, out_labelX])

for i in range (0, len(out_labelX)):
  cv2.circle(cropped_image, (out_labelX[i], out_labelY[i]), 2, (0,0,255), 2)
cv2_imshow(cropped_image)

Loading annotations...


KeyboardInterrupt: ignored