<a href="https://colab.research.google.com/github/werd0n4/hand-gesture-classification/blob/master/Main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries

---


In [42]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [43]:
!ls

drive  hand_gesture_classifier.h5  sample_data


In [44]:
import pandas as pd
import numpy as np
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import os
import cv2
import glob
import random
from matplotlib import pyplot as plt

# Constant parameters

---



In [45]:
horiz_x = 640
vert_y = 480
imgs_train_path = '/content/drive/My Drive/Colab Notebooks/Rozszerzony_dataset/Train/'
imgs_test_path = '/content/drive/My Drive/Colab Notebooks/Rozszerzony_dataset/Test/'

In [46]:

class_names = {
    0: "1",
    1: "2",
    2: "3",
    3: "4",
    4: "5",
    5: "A",
    6: "B",
    7: "C",
    8: "D",
    9: "E",
    10: "F",
    11: "G",
    12: "H",
    13: "I",
    14: "K",
    15: "L",
    16: "M",
    17: "N",
    18: "O",
    19: "P",
    20: "R",
    21: "S",
    22: "T",
    23: "U",
    24: "W",
    25: "Y",
    26: "Z"
}


# Auxiliary functions

---


In [47]:

def resize(path):
    img_counter = 0

    for dirname in os.listdir(path): 
        for filename in os.listdir(os.path.join(path, dirname)):
            image_path = os.path.join(path, dirname, filename)
            img = cv2.imread(image_path)
            resized_img = cv2.resize(img, (horiz_x, vert_y))
            cv2.imwrite(image_path, resized_img)
            img_counter += 1
    
    print('Images in set: ' + str(img_counter))


In [48]:

def sanity_check(path):
    counter = 0

    for dirname in os.listdir(path): 
        for filename in os.listdir(os.path.join(path, dirname)):
            image_path = os.path.join(path, dirname, filename)
            img = cv2.imread(image_path)
            if img.shape != (horiz_x, vert_y, 3):
                counter += 1

    print('Sanity result: ' + str(counter))

In [49]:

def show_img(index, X, Y):
    # plt.imshow(X[index])
    plt.imshow(cv2.cvtColor(X[index],cv2.COLOR_BGR2RGB).astype('float32'))
    plt.show()
    nmb = int(np.where(Y[index] == 1)[0])
    print("On image: " + class_names[nmb])

## Load dataset

In [50]:

def load_dataset():
    trainlist = glob.glob(f'{imgs_train_path}/*/*')
    testlist = glob.glob(f'{imgs_test_path}/*/*')
    X_train = np.array( [np.array(cv2.normalize(cv2.imread(fname), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)) for fname in trainlist] )
    X_test = np.array( [np.array(cv2.normalize(cv2.imread(fname), None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)) for fname in testlist] )

    # Y_train = np.array([np.zeros(27) for fname in trainlist])
    Y_train = np.array([0 for fname in trainlist])
    for i,fname in enumerate(trainlist):
        img_id = fname.split('/')[7]
        img_id = img_id.split('_')[0]
        # Y_train[i][img_id] = 1
        Y_train[i] = img_id


    # Y_test = np.array([np.zeros(27) for fname in testlist])
    Y_test = np.array([0 for fname in testlist])
    for i,fname in enumerate(testlist):
        img_id = fname.split('/')[7]
        img_id = img_id.split('_')[0]
        # Y_test[i][img_id] = 1
        Y_test[i] = img_id

    return X_train, Y_train, X_test, Y_test


## Create network model

In [51]:

def create_model():
    model = Sequential()
    
    # CONVOLUTIONAL LAYER
    model.add(Conv2D(
        filters=6, 
        kernel_size=(3,3), 
        input_shape=(vert_y, horiz_x, 3), 
        activation='relu'
    ))

    # POOLING LAYER
    model.add(MaxPool2D(
        pool_size=(2,2),
        strides=(2,2)
    ))

    # CONVOLUTIONAL LAYER
    model.add(Conv2D(
        filters=16, 
        kernel_size=(3,3), 
        input_shape=(317, 237, 6), 
        activation='relu'
    ))

    # POOLING LAYER
    model.add(MaxPool2D(
        pool_size=(2,2),
        strides=(2,2)
    ))

    # CONVOLUTIONAL LAYER
    model.add(Conv2D(
        filters=120, 
        kernel_size=(3,3), 
        input_shape=(156, 116, 16), 
        activation='relu'
    ))

    # POOLING LAYER
    model.add(MaxPool2D(
        pool_size=(2,2),
        strides=(2,2)
    ))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(84, activation='relu'))
    model.add(Dense(27, activation='softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam', 
        metrics=['accuracy']
    )

    return model


# Loading dataset

---


In [52]:
# Uncomment before first run on dataset 
# resize(imgs_train_path)
# sanity_check(imgs_train_path)
# resize(imgs_test_path)
# sanity_check(imgs_test_path)


In [None]:
X_train, Y_train, X_test, Y_test = load_dataset()

In [None]:
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [None]:
# One hot encoding
Y_cat_train = to_categorical(Y_train, 27)
Y_cat_test = to_categorical(Y_test, 27)

## Data augumentation

In [None]:
image_gen = ImageDataGenerator(rotation_range=5, # rotate the image 20 degrees
                               width_shift_range=0.05, # Shift the pic width by a max of 5%
                               height_shift_range=0.05, # Shift the pic height by a max of 5%
                            #    rescale=1.1, # Rescale the image by normalzing it.
                               shear_range=0.05, # Shear means cutting away part of the image (max 10%)
                               zoom_range=0.05, # Zoom in by 10% max
                               fill_mode='nearest' # Fill in missing pixels with the nearest filled value
                                )
                              

In [None]:
#augumentation sample result

#original img
nmb = random.randint(0, 296)
my_hand = X_test[nmb]
show_img(nmb, X_test, Y_cat_test)

#generated img
gen_img = image_gen.random_transform(my_hand)
print("Generated image")
plt.imshow(cv2.cvtColor(gen_img,cv2.COLOR_BGR2RGB).astype('float32'))
plt.show()

comparison = my_hand == gen_img
equal_arrays = comparison.all()
print("Images are equal?: " + str(equal_arrays))

In [None]:
image_shape = (horiz_x, vert_y, 3)
batch_size = 16

print(imgs_train_path)
print(imgs_test_path)

In [None]:
train_image_gen = image_gen.flow_from_directory(imgs_train_path,
                                               target_size=image_shape[:2],
                                                color_mode='rgb',
                                               batch_size=batch_size,
                                               class_mode='categorical')

In [None]:
test_image_gen = image_gen.flow_from_directory(imgs_test_path,
                                               target_size=image_shape[:2],
                                               color_mode='rgb',
                                               batch_size=batch_size,
                                               class_mode='categorical',shuffle=False)

In [None]:
train_image_gen.class_indices

# Create and train model

---


In [None]:
CNN = create_model()
CNN.summary()

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=1)
Y_cat_train.shape

In [None]:
CNN.fit(X_train, Y_cat_train, epochs=12, validation_data=(X_test, Y_cat_test), callbacks=[early_stop])

In [None]:
results = CNN.fit(train_image_gen,epochs=20,
                              validation_data=test_image_gen,
                             callbacks=[early_stop]
                            )

## Saving model to file

In [None]:
CNN.save('hand_gesture_classifier.h5')

# Results

---


In [None]:
metrics = pd.DataFrame(CNN.history.history)
metrics

## Single image prediction

In [None]:
nmb = 2
my_hand = X_test[nmb]
show_img(nmb, X_test, Y_cat_test)

In [None]:
my_hand = my_hand.reshape(1, vert_y, horiz_x, 3)
result = CNN.predict_classes(my_hand)
result = int(result)
print('CNN says: ' + class_names[result])

## Predictions for individual classes

In [None]:
predictions = CNN.predict_classes(X_test)

In [None]:
print(classification_report(Y_test,predictions))