In [None]:
#import the necessary libraries
import os
import sys
import scipy.io
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import numpy as np
import tensorflow as tf
import imageio
import h5py
import tables
from decimal import Decimal
import time
import functools
from functools import reduce
import cv2
%matplotlib inline

In [None]:
#this ensures the program can use all the gpu resources it can get
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

In [None]:
#all dataset images are resized to 256x256 and stored in an hdf5 file so that the data pipeline is not throttled 
#while reading from the disk. 

hdf5_path = "../data/image_mpii_data.hdf5"
hdf5_pose_path = "../data/pose_raw.hdf5"

hdf5_file = tables.open_file(hdf5_path, mode='r')
hdf5_pose_file = tables.open_file(hdf5_pose_path, mode='r')

pose_images = hdf5_file.root.pose_images[:,:,:,:]
pose_data = hdf5_pose_file.root.pose_stick[:,:,:,:]

batch_size = 16
number_of_images = pose_images.shape[0]

In [None]:
print(str(number_of_images) + " " + str(pose_data.shape[0]))
number_train_images = 25936 #90% of whole set
number_test_images = 2800

In [None]:
# initialize weights
def weight_initializer(weight_input, output_channel_size, filter_size): #, layer_num
    
    _, rows, columns, input_channel_size = [i.value for i in weight_input.get_shape()]
    
    weight_shape = [filter_size,filter_size,input_channel_size,output_channel_size]

    weight_output = tf.Variable(tf.contrib.layers.xavier_initializer(uniform = False)(weight_shape))
    
    #weight_output = tf.get_variable(shape = weight_shape, dtype=tf.float32, 
                                    #initializer = tf.contrib.layers.xavier_initializer(uniform = False)) 
    #name = "weight-" + str(layer_num),
    
    return weight_output

In [None]:
# convolution block 
def conv2d(block_input, num_filters, filter_size = 1, stride_length = 1): #, layer_num
    
    init_weights = weight_initializer(block_input, num_filters, filter_size) #, layer_num
    strides = [1,stride_length,stride_length,1]
    block_output = tf.nn.conv2d(block_input,init_weights,strides,padding='VALID')
    
    return block_output

In [None]:
def conv_bn_relu(block_input, num_filters, filter_size = 1, stride_length = 1): #, layer_num
    
    init_weights = weight_initializer(block_input, num_filters, filter_size) #, layer_num
    strides = [1,stride_length,stride_length,1]
    
    block_output = tf.nn.conv2d(block_input,init_weights,strides,padding='VALID')
    normalized = tf.contrib.layers.batch_norm(block_output, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
    
    return normalized

In [None]:
def conv_block(block_input, num_filters):
    norm_1 = tf.contrib.layers.batch_norm(block_input, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
    conv_1 = conv2d(norm_1, int(num_filters/2), 1, 1)
    norm_2 = tf.contrib.layers.batch_norm(conv_1, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
    pad = tf.pad(norm_2, np.array([[0,0],[1,1],[1,1],[0,0]]))
    conv_2 = conv2d(pad, int(num_filters/2), 3, 1)
    norm_3 = tf.contrib.layers.batch_norm(conv_2, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu)
    conv_3 = conv2d(norm_3, int(num_filters), 1, 1)
    
    return conv_3

In [None]:
def skip_layer(block_input, num_filters):
    
    if (block_input.get_shape()[3] == num_filters):
        return block_input
    else:
        conv = conv2d(block_input, num_filters,1,1)
        return conv

In [None]:
def residual(block_input, num_filters):
    conv = conv_block(block_input, num_filters)
    skip = skip_layer(block_input, num_filters)
    
    return(tf.add_n([conv,skip]))

In [None]:
def hourglass_unit(input_data, reduction_factor, num_filters):
    up_1 = residual(input_data, num_filters)
    low = tf.contrib.layers.max_pool2d(input_data, [2,2],[2,2], 'VALID')
    low_1 = residual(low, num_filters)
    
    if reduction_factor > 0:
        low_2 = hourglass_unit(low_1, reduction_factor - 1, num_filters)
    else:
        low_2 = residual(low_1, num_filters)
    
    low_3 = residual(low_2, num_filters)
    up_sample = tf.image.resize_nearest_neighbor(low_3, tf.shape(low_3)[1:3]*2)
    return tf.add_n([up_1, up_sample])

In [None]:
def hourglass_model(input_data, num_blocks, num_filters, reduction_factor, train_model):
    pad_1 = tf.pad(input_data, np.array([[0,0],[2,2],[2,2],[0,0]]))
    conv_1 = conv2d(pad_1, 64,6,2)
    res_1 = residual(conv_1, 128)
    pool_1 = tf.contrib.layers.max_pool2d(res_1, [2,2], [2,2], padding= 'VALID')
    res_2 = residual(pool_1, 128)
    res_3 = residual(res_2, num_filters)
    
    x1 = [None] * num_blocks
    x2 = [None] * num_blocks
    x3 = [None] * num_blocks
    x4 = [None] * num_blocks
    x5 = [None] * num_blocks
    x6 = [None] * num_blocks
    sum_all = [None] * num_blocks
    
    x1[0] = hourglass_unit(res_3, reduction_factor, num_filters)
    x2[0] = conv_bn_relu(x1[0], num_filters)
    x3[0] = conv2d(x2[0], num_filters, 1, 1)
    x4[0] = tf.layers.dropout(x3[0], rate = 0.1, training = train_model)
    x5[0] = conv2d(x2[0], num_filters, 1, 1)
    x6[0] = conv2d(x5[0], num_filters, 1, 1)
    sum_all[0] = tf.add_n([x4[0], x6[0], res_3])
    
    for i in range(1, num_blocks - 1):
        x1[i] = hourglass_unit(sum_all[i-1], reduction_factor, num_filters)
        x2[i] = conv_bn_relu(x1[i], num_filters)
        x3[i] = conv2d(x2[i], num_filters, 1, 1)
        x4[i] = tf.layers.dropout(x3[i], rate = 0.1, training = train_model)
        x5[i] = conv2d(x2[i], num_filters, 1, 1)
        x6[i] = conv2d(x5[i], num_filters, 1, 1)
        sum_all[i] = tf.add_n([x4[i], x6[i], sum_all[i-1]])
    
    x1[num_blocks - 1] = hourglass_unit(sum_all[num_blocks - 2], reduction_factor, num_filters)
    x2[num_blocks - 1] = conv_bn_relu(x1[num_blocks - 1], num_filters)
    x4[num_blocks - 1] = tf.layers.dropout(x2[num_blocks - 1], rate = 0.1, training = train_model)
    x5[num_blocks - 1] = conv2d(x4[num_blocks - 1], 3, 1, 1)
    final_output = tf.image.resize_nearest_neighbor(x5[num_blocks - 1], tf.shape(x5[num_blocks - 1])[1:3]*2)
    return final_output

In [None]:
with tf.Graph().as_default(),tf.Session() as sess:
    
    pose_img_input = tf.placeholder(tf.float32,shape=(batch_size,pose_images.shape[1],pose_images.shape[2],
                                                      pose_images.shape[3]),name='pose_img_ip')
    pose_stick_input = tf.placeholder(tf.float32,shape=(batch_size,pose_data.shape[1],pose_data.shape[2],
                                                       pose_data.shape[3]),name='pose_data_ip')
    ###################################################################################################################
    
    pose_img_input = pose_img_input/255.0
    
    hg_output = hourglass_model(pose_img_input, 4, 256, 3, True) # true while training, false during inference
    ###################################################################################################################
    
    #calculate mse losses 
    pose_stick_input = pose_stick_input/255.0
    total_loss = tf.losses.mean_squared_error(pose_stick_input, hg_output)
    
    ###################################################################################################################
    #20180905 rate = 2.5e-4. losses fell from 580 to 370. 1 epoch trainin time ~ 1000secs
    learning_rate = 2.5e-4
    run = 1
    training_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
    num_epochs = 60
    sess.run(tf.global_variables_initializer())
    
    ###################################################################################################################
    
    restore_model = False
    save_model = True
    train_data = True
    
    #restore variable values. while saving the model further below, im only saving variable values and not the graph. 
    if(restore_model):
        saver =  tf.train.Saver()  
        saver.restore(sess,'../models/20180905/hg_12_4')
    ###################################################################################################################
    
    if train_data:   
        
        num_minibatch = int(number_train_images/batch_size)
        
        for i in range(num_epochs+1):
            print("epoch number is ", i)
            batch_loss = 0.0

            start_time = time.time()

            for j in range(num_minibatch):
                temp_content = pose_images[j*batch_size:batch_size*(j+1),:,:,:]
                temp_pose_data = pose_data[j*batch_size:batch_size*(j+1),:,:,:]

                _,tl = sess.run([training_step,total_loss], feed_dict={pose_img_input:temp_content, 
                                                                       pose_stick_input:temp_pose_data})
                batch_loss += tl/num_minibatch

            end_time = time.time()
            print("total loss is ", batch_loss)
            print("epoch time is ", (end_time - start_time))

            #save the model without the graph.
            if(save_model and i%3 == 0):
                saver_2 = tf.train.Saver()  
                saver_2.save(sess,"../models/20180905/hg_" + str(i) + "_" + str(run),write_meta_graph=False) 
    ###################################################################################################################

    