<center style="font-weight:bold;font-size:20px">wbenbihi/hourglasstensorlfow: Stacked Hourglass Network for Human Pose Estimation</center>

<center style="font-weight:bold;font-size:20px">Training</center>

# Setup

## Imports

In [1]:
import os
import sys
sys.path.append(os.path.join('..'))

In [2]:
from config import CFG
import yaml
import pandas as pd
import tensorflow as tf
from src.dataset import *

## Global Variables

In [3]:
ROOT_FOLDER = CFG.ROOT_FOLDER
DATA_FOLDER = 'data'
IMAGE_FOLDER = 'images'
LABELS_FILE = "MPII_DATASET_LABELS.csv"

# Function definition

In [4]:
def extract_coordinates_and_filenames(df: pd.DataFrame, config: dict):
    image_column = config['image_column']
    joint_format_regex = config["joint_column_format"].format(JOINTNUMBER="[0-9]*", IDVISIBILITY="(x|y)")
    filenames = (os.path.join(ROOT_FOLDER, config['data_folder']) + '/' + df.filter(regex=yaml_config.get('dataset').get('image_column'))).image.tolist()
    coordinates = df.filter(regex=joint_format_regex).values.reshape((-1, 16, 2))
    return filenames, coordinates

In [5]:
def generate_io_dataset(filenames:list, coordinates, config: dict):
    dataset = tf.data.Dataset.from_tensor_slices(
        (filenames, coordinates)
    ).map(
        tf_parse_dataset
    ).map(
        lambda x, y: tf_preprocess(
            x,
            y, 
            config.get('data').get('input_size'),
            config.get('data').get('output_size')
        )
    )
    return dataset

# Main Code

## Open YAML Config File

In [6]:
with open(os.path.join(ROOT_FOLDER, 'config', 'hpeDefault.yml'), 'r') as f:
    yaml_config = yaml.load(f, Loader=yaml.FullLoader)

Let's open the DataFrame with the Dataset summary

In [7]:
df = pd.read_csv(os.path.join(ROOT_FOLDER, yaml_config.get("dataset").get('summary_file')), sep=";")
train_df = df.query(f"{yaml_config.get('dataset').get('train_test_split')} == 1")
test_df = df.query(f"{yaml_config.get('dataset').get('train_test_split')} == 0")

In [8]:
# We generate an array for 
train_filenames, train_coordinates = extract_coordinates_and_filenames(train_df, yaml_config.get('dataset'))
test_filenames, test_coordinates = extract_coordinates_and_filenames(test_df, yaml_config.get('dataset'))

In [9]:
# We Instantiate our dataset
train_dataset = generate_io_dataset(train_filenames, train_coordinates, yaml_config)
test_dataset = generate_io_dataset(test_filenames, test_coordinates, yaml_config)

In [17]:
test_dataset

<MapDataset shapes: ((256, 256, 3), (64, 64, 16)), types: (tf.float32, tf.float32)>

In [11]:
rand_tensor = tf.random.uniform(
    [], minval=-1*45, maxval=45, dtype=tf.dtypes.float32, seed=None, name=None
)

In [17]:
rand_tensor

<tf.Tensor: shape=(), dtype=float32, numpy=42.216187>

In [None]:
@tf.function
def tf_random_rotation(images, heatmaps, rotation_range):
    rand = tf.random.uniform(
        [], minval=-1*rotation_range, maxval=rotation_range, dtype=tf.dtypes.float32, seed=None, name=None
    )
    rotated_images = tf.image.rotate(images, rand)
    rotated_heatmaps = tf.image.rotate(heatmaps, rand)
    return rotated_images, rotated_heatmaps

In [22]:
def tf_stacker(inputs, heatmaps, stacks):
    return inputs, tf.stack([heatmaps]*stacks, axis=0)

In [23]:
u = train_dataset.map(tf_stacker)

In [24]:
for tfsample in u.take(5):
    pass

In [25]:
tfsample

(<tf.Tensor: shape=(256, 256, 3), dtype=float32, numpy=
 array([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         ...,
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         ...,
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         ...,
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        ...,
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         ...,
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         ...,
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         ...,
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]], 

The first element to check is the integer coefficient between input and output size to determine the number of downsizing.

In [None]:
assert (CFG.default.HOURGLASS.inputsize / CFG.default.HOURGLASS.outputsize).is_integer(), 'InputSize is not divisible by OutputSize'
PREPROCESS_DOWNSIZING = int(CFG.default.HOURGLASS.inputsize / CFG.default.HOURGLASS.outputsize) -1

We then check if our input image mode is genuine to define the channel numbers

In [None]:
assert CFG.default.HOURGLASS.inputmode in ["RGB", "BGR", "HSV", "HSL", "GRAY"], 'The input image mode is not recognized'
if CFG.default.HOURGLASS.inputmode in ["RGB", "BGR", "HSV", "HSL"]:
    CHANNELS = 3
elif CFG.default.HOURGLASS.inputmode in ["GRAY"]:
    CHANNELS = 1