## Hand image preprocessing

In [None]:
#HAND IMAGE PREPROCESSING, NOT LINNAN'S APPROACH WHICH USES SURF

import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from keras.preprocessing.image import load_img, img_to_array
from operator import itemgetter
import glob

def getHandMask(image):
    cropped = cv2.imread(image)[:, 20:140]
    rgb = cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)
    hsv = cv2.cvtColor(rgb, cv2.COLOR_RGB2HSV)

    # define range of hand color in HSV
    lower_hand = np.array([0,.2*255,50])
    upper_hand = np.array([40,.65*255,255])

    mask = cv2.inRange(hsv, lower_hand, upper_hand)
    mask = cv2.blur(mask, (3, 3))

    return mask

def findHandCountours(image):
    mask = getHandMask(image)
    edges = cv2.Canny(mask, 50,100)

    im2,contours,hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)

    contoursAreaIdx = [(idx, cv2.contourArea(cnt)) for idx, cnt in enumerate(contours)]
    contoursAreaIdx.sort(key=itemgetter(1), reverse=True)

    image = np.zeros(mask.shape, np.uint8)
    cv2.drawContours(image, contours, contoursAreaIdx[0][0], (255,255,255), 1)


def findHandConnectedComponents(image):
    mask = getHandMask(image)

    num, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)

    connectedAreas = sorted([(idx, stat[cv2.CC_STAT_AREA]) for idx, stat in enumerate(stats.tolist())], key=itemgetter(1), reverse=True)
    #plt.imshow(mask)

    maxConnectedAreaLabel = connectedAreas[1][0]
    maxConnected = [[255 if e == maxConnectedAreaLabel  else 0 for e in row] for row in labels]
    
    img = np.array(maxConnected)
    plt.figure()
    plt.imshow(img)
    plt.gray()

    return img

# Neural Network Approach

In [1]:
import sys
import tensorflow as tf
import os, glob
import glob
import csv
import numpy as np
import keras
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, Dropout, MaxPooling2D, Reshape, Activation, Flatten, LeakyReLU, Add, BatchNormalization
from keras.callbacks import ModelCheckpoint, TensorBoard

%matplotlib inline

height = 100
width = 120
n_channels = 3
batch_size = 1
n_train_images = 60
total_images = 74
num_images = 0

train_hands = np.empty((n_train_images, height, width, n_channels), dtype=np.float32)
test_hands = np.empty((total_images - n_train_images, height, width, n_channels), dtype=np.float32)
all_hands = np.empty((total_images, height, width, n_channels), dtype=np.float32)

#f = '../raw_data_small/001_HandPhoto_left_01.jpg'
for x in glob.glob('processed/*.jpg'):
    img = img_to_array(load_img(x))/255
    all_hands[num_images] = img
    if num_images >= n_train_images:
        test_hands[num_images - n_train_images] = img
    else:
        train_hands[num_images] = img
    num_images += 1
    
train_labels = np.empty((n_train_images, 1), dtype=np.float32)
test_labels = np.empty((total_images - n_train_images, 1), dtype=np.float32)
all_labels = np.empty((total_images, 1), dtype=np.float32)

with open('trainTargets.csv', 'rb') as csvfile:
    targets = csv.reader(csvfile, delimiter=',', quotechar='|')
    next(targets)
    i = 0
    for row in targets:
        all_labels[i] = row[1]
        if (i >= n_train_images):
            test_labels[i - n_train_images] = row[1]
        else:
            train_labels[i] = row[1]
        i +=1

def vanilla_conv():
    model = Sequential()

    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape = (height, width, n_channels)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='relu'))
    
    return model

def residual_block(input_layer):
    layer = input_layer
    layer = Conv2D(64, (4, 4), padding='same')(layer)
    layer = BatchNormalization()(layer)
    layer = LeakyReLU()(layer)
    layer = Conv2D(64, (4,4), padding='same')(layer)
    layer = BatchNormalization()(layer)
    layer = Add()([input_layer, layer])
    layer = LeakyReLU()(layer)
    return layer

def residual_conv(input_layer):
    x = Conv2D(64, (4, 4), padding='same', input_shape=(height, width, n_channels))(input_layer)
    #https://arxiv.org/abs/1502.03167
    x = BatchNormalization()(x)
    # Leaky ReLU. Leaky ReLUs are one attempt to fix the “dying ReLU” problem. 
    # Instead of the function being zero when x < 0, a leaky ReLU will instead 
    # have a small negative slope (of 0.01, or so). 
    x = LeakyReLU()(x)
    x = residual_block(x)
    x = residual_block(x)
    x = residual_block(x)
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    return x

#Resnet Approach w/ exploding gradient issue
#input_layer = Input(shape=(height, width, n_channels))
#x = residual_conv(input_layer)
#pred = Dense(1, activation='relu')(x)

#model = Model(inputs=input_layer, outputs=pred)
#print model.summary()

model = vanilla_conv()
model.compile(loss='mse', optimizer='adam', )

tensorboard = TensorBoard(log_dir="/home/ubuntu/logs")

Using TensorFlow backend.


In [None]:
model.fit(all_hands, all_labels, epochs=5000, batch_size=10, validation_split=.1, callbacks=[tensorboard])

Train on 66 samples, validate on 8 samples
Epoch 1/5000
Epoch 2/5000
Epoch 3/5000
Epoch 4/5000
Epoch 5/5000
Epoch 6/5000
Epoch 7/5000
Epoch 8/5000
Epoch 9/5000
Epoch 10/5000
Epoch 11/5000
Epoch 12/5000
Epoch 13/5000
Epoch 14/5000
Epoch 15/5000
Epoch 16/5000
Epoch 17/5000
Epoch 18/5000
Epoch 19/5000
Epoch 20/5000
Epoch 21/5000
Epoch 22/5000
Epoch 23/5000
Epoch 24/5000
Epoch 25/5000
Epoch 26/5000
Epoch 27/5000
Epoch 28/5000
Epoch 29/5000
Epoch 30/5000
Epoch 31/5000
Epoch 32/5000
Epoch 33/5000
Epoch 34/5000
Epoch 35/5000
Epoch 36/5000
Epoch 37/5000
Epoch 38/5000
Epoch 39/5000
Epoch 40/5000
Epoch 41/5000
Epoch 42/5000
Epoch 43/5000
Epoch 44/5000
Epoch 45/5000
Epoch 46/5000
Epoch 47/5000
Epoch 48/5000
Epoch 49/5000
Epoch 50/5000
Epoch 51/5000
Epoch 52/5000
Epoch 53/5000
Epoch 54/5000
Epoch 55/5000
Epoch 56/5000
Epoch 57/5000
Epoch 58/5000
Epoch 59/5000
Epoch 60/5000
Epoch 61/5000
Epoch 62/5000
Epoch 63/5000
Epoch 64/5000
Epoch 65/5000
Epoch 66/5000
Epoch 67/5000
Epoch 68/5000
Epoch 69/5000


In [2]:
# Transfer Learning Approach From Keras

from keras import applications
from keras.layers import Input

model_vgg16_conv = applications.vgg16.VGG16(weights='imagenet', include_top=False)
model_vgg16_conv.summary()

input = Input(shape=(height, width, n_channels))
out = model_vgg16_conv(input)

x = Flatten()(out)
x = Dense(64, activation='relu')(x)
x = Dense(1, activation='relu')(x)

model = Model(inputs=input, outputs=x)
model.compile(loss='mse', optimizer='adam')
model.summary()
model.fit(train_hands, train_labels, epochs=1000, batch_size=10)

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________

NameError: name 'height' is not defined

In [9]:
import matplotlib.pyplot as plt
img = np.empty((1, height, width, n_channels))

for idx, image in enumerate(test_hands):
    img[0] = image
    print('predict: ', model.predict(img, batch_size=10)[0][0]), 'actual: ',  test_labels[idx][0]
model.predict(img, batch_size=1)

('predict: ', 7.3440695) actual:  7.33522
('predict: ', 6.7360468) actual:  6.76824
('predict: ', 6.6889181) actual:  6.69317
('predict: ', 6.681716) actual:  6.7571
('predict: ', 8.0478287) actual:  8.03722
('predict: ', 6.5520983) actual:  6.6638
('predict: ', 7.0040674) actual:  8.44109
('predict: ', 7.2573714) actual:  7.1412
('predict: ', 7.3234377) actual:  6.81626
('predict: ', 7.25103) actual:  7.41646
('predict: ', 7.1030378) actual:  7.4375
('predict: ', 7.2781258) actual:  7.68456
('predict: ', 7.306385) actual:  7.42929
('predict: ', 6.736938) actual:  6.36737


array([[ 6.736938]], dtype=float32)

# Manual Approach with a Linear Scan over Images and simple linear model to translate linear scan pixels to wrist widths

## TODO