# Hand Gesture Identification

#### Importing Libraries

In [1]:
import tensorflow as tf
from tensorflow.keras import models
import numpy as np
from PIL import Image
import cv2
import imutils

#### Global Variables

In [2]:
bg = None
temp_image = 'temp.png'

### Resize Image

Used to resize the image given as input.


In [3]:
def resizeImage(imageName):
    basewidth = 100
    img = Image.open(imageName)
    wpercent = (basewidth/float(img.size[0]))
    hsize = int((float(img.size[1])*float(wpercent)))
    img = img.resize((basewidth, hsize), Image.ANTIALIAS)
    img.save(imageName)

## Running Average

In [4]:
def run_avg(image, aWeight):
    global bg
    # initialize the background
    if bg is None:
        bg = image.copy().astype("float")
        return
    # compute weighted average, accumulate it and update the background
    cv2.accumulateWeighted(image, bg, aWeight) #(src,dst,alpha)

## Segimentation

In [5]:
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff,
                                threshold,
                                255,
                                cv2.THRESH_BINARY)[1]

    # get the contours in the thresholded image
    (cnts, _) = cv2.findContours(thresholded.copy(),
                                 cv2.RETR_EXTERNAL,
                                 cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (thresholded, segmented)

## Getting Predicted Class

In [6]:
def getPredictedClass():
    # read the image
    image = cv2.imread(temp_image)
    # convert to greyscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # change to numpy array
    gray_image = gray_image.reshape(89, 100, 1)
    # change to numpy array with shape (1, 89, 100, 1) so model can receive it
    gray_image = np.array([gray_image])
    # predict the image
    prediction = model.predict(gray_image)
    # return a numpy array with all values for layers
    print(prediction)
    return prediction

## Displaying Result

In [7]:
def showStatistics(prediction):
    gestures = ['fist', 'palm', 'swing','vivek']
    n = len(gestures)
    x = 30
    y = 30
    height = (n+3)*y
    width = 500
    textImage = np.zeros((height, width, 3), np.uint8)
    for i in range(0, len(gestures)):
        cv2.putText(textImage, 
                    gestures[i] + ' : ' + f"{prediction[0][i]:.2f}" , 
                    (x, y),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1,
                    (255, 255, 255),
                    2)
        y = y + 30
        
    predicted_gesture = gestures[np.argmax(prediction)]
    sum = 0.00
    for i in prediction[0]:
        sum += i
    confidence = (np.amax(prediction) /  sum) * 100
    
    cv2.putText(textImage, 
            "Gesture: " + predicted_gesture, 
            (x, y),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (255, 255, 255),
            2)
    y += 30
    cv2.putText(textImage, 
            "Confidence: " + str(confidence) + "%", 
            (x, y),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (255, 255, 255),
            2)
    cv2.imshow("Statistics ", textImage)

## Loading Model

In [8]:
tf.compat.v1.reset_default_graph()
model = models.load_model('./TrainedModel/')

## Predicting

In [9]:
# get the reference to the webcam
camera = cv2.VideoCapture(0)


In [10]:
# initialize weight for running average
aWeight = 0.5

# region of interest (ROI) coordinates
top, right, bottom, left = 10, 350, 225, 590

# initialize num of frames
num_frames = 0
start_recording = False

# keep looping, until interrupted
while(True):
    # get the current frame
    grabbed, frame = camera.read()

    if grabbed:
        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)

        # to get the background, keep looking till a threshold is reached
        # so that our running average model gets calibrated
        
        if num_frames < 30:
            run_avg(gray, aWeight)
        else:
            # segment the hand region
            hand = segment(gray)

            # check whether hand region is segmented
            if hand is not None:
                # if yes, unpack the thresholded image and
                # segmented region
                (thresholded, segmented) = hand

                # draw the segmented region and display the frame
                cv2.drawContours(
                    clone, [segmented + (right, top)], -1, (0, 0, 255))
                if start_recording:
                    cv2.imwrite(temp_image, thresholded)
                    resizeImage(temp_image)
                    # predictedClass, confidence = getPredictedClass()
                    prediction = getPredictedClass()
                    showStatistics(prediction)

                cv2.imshow("Thesholded", thresholded)

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0, 255, 0), 2)

        # increment the number of frames
        num_frames += 1

        # display the frame with segmented hand
        cv2.putText(clone, "frame: " + str(num_frames), (30, 30),cv2.FONT_HERSHEY_SIMPLEX,1,(255, 255, 255),2)
        cv2.imshow("Video Feed", clone)
       
        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break

        if keypress == ord("s"):
            start_recording = True
    else:
        print("Error, Please check your camera")
        print(camera)
        break

# relaease the resources
camera.release()
cv2.destroyAllWindows()


[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1.0000000e+00 2.5442835e-08 0.0000000e+00 0.0000000e+00]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1.000000e+00 9.032058e-20 0.000000e+00 0.000000e+00]]
[[1. 0. 0. 0.]]
[[1.00000e+00 5.98649e-37 0.00000e+00 0.00000e+00]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]


[[1.0000000e+00 0.0000000e+00 2.7989255e-23 0.0000000e+00]]
[[4.213183e-35 0.000000e+00 1.000000e+00 0.000000e+00]]
[[1.0000000e+00 0.0000000e+00 3.1994857e-31 0.0000000e+00]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1.0332534e-04 0.0000000e+00 9.9989665e-01 2.2614786e-26]]
[[1.000000e+00 0.000000e+00 6.883071e-09 4.101505e-30]]
[[1.0000000e+00 0.0000000e+00 1.4788226e-16 3.4154594e-14]]
[[0.84483504 0.01851261 0.12221909 0.01443331]]
[[0.84483504 0.01851261 0.12221909 0.01443331]]
[[0.84483504 0.01851261 0.12221909 0.01443331]]
[[2.8590228e-05 0.0000000e+00 9.5919245e-01 4.0778916e-02]]
[[1.2711245e-16 0.0000000e+00 2.5216316e-18 1.0000000e+00]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[0. 0. 0. 1.]]
[[0. 0. 0. 1.]]
[[0. 1. 0. 0.]]
[[0. 1. 0. 0.]]
[[0. 1. 0. 0.]]
[[0. 0. 0. 1.]]
[[1.0000000e+00 0.0000000e+00 1.4935415e-29 0.0000000e+00]]
[[1. 0. 0. 0.]]
[[1.0000000e+00 0.0000000e+00 2.5379173e-21 0.0000000e+00]]
[[1. 0. 0. 0.]]
[[1. 0. 0. 0.]]
[[1.0000

In [11]:
# remove temporary image file
import os
os.remove(temp_image)