In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import glob, os, sys, shutil

from keras_preprocessing.image import load_img, img_to_array, ImageDataGenerator
from keras.models import Sequential, Input
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout
from keras.utils.vis_utils import plot_model

from keras.callbacks import ModelCheckpoint
from keras.models import Model


from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from PIL import Image

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


# Data Collection

In [8]:
def binaryMask(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.GaussianBlur(img, (7,7), 3)
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    ret, new = cv2.threshold(img, 25, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    return cv2.cvtColor(new, cv2.COLOR_BGR2RGB)

In [None]:
import cv2

cam = cv2.VideoCapture(0)

x0, y0, width = 10, 50, 200
count = 0

while(True):
    ret, frame = cam.read()
    window = cv2.flip(frame, 1) # mirror
    cv2.rectangle(window, (x0, y0), (x0 + width,y0 + width), (0, 255, 255), 1)
    cv2.imshow('Image', window)
      
    roi = window[x0 : y0 + width, y0 : x0 + width - 1]
    roi = binaryMask(roi)
    
    cv2.namedWindow('ROI', cv2.WINDOW_NORMAL)
    cv2.imshow("ROI", roi)
    
    cv2.imwrite('D://Data//Counting Finger//data_collection//{}_{}.png'.format(str(5), count), roi)
    count += 1
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cam.release()
cv2.destroyAllWindows()

print ("{} frames stored".format(count))

### Loading model that is trained on colab

In [19]:
model = Sequential()

model.add(Conv2D(16, (2, 2), input_shape = (128, 128, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (3, 3)))

model.add(Conv2D(32, (2, 2)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (3, 3)))

model.add(Dropout(0.5))

model.add(Conv2D(64, (2, 2)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (3, 3)))

model.add(Flatten())

model.add(Dense(1024))
model.add(Dense(256))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(6))
model.add(Activation('sigmoid'))

model.load_weights("./custom_data_weights_v3.hdf5")

model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy']
)

# Predicting via webcam

In [23]:
import cv2

cam = cv2.VideoCapture(0)

x0, y0, width = 10, 50, 200
count = 0
font = cv2.FONT_HERSHEY_SIMPLEX

while(True):
    ret, frame = cam.read()
    window = cv2.flip(frame, 1) # mirror
    cv2.rectangle(window, (x0, y0), (x0 + width,y0 + width), (0, 255, 255), 1)
    
    roi = window[x0 : y0 + width, y0 : x0 + width - 1]
    roi = binaryMask(roi)
    resize_roi = np.reshape(np.array(Image.fromarray(roi).resize((128, 128))), (128, 128, 3))
    resize_roi = resize_roi / 255
#     print (resize_roi.shape)
    cv2.imshow('ROI', resize_roi)    
    predict_class = model.predict_classes(np.reshape(resize_roi, (1, 128, 128, 3)))
    cv2.putText(window, '{}'.format(str(predict_class)), (10,450), font, 3, (0, 255, 0), 2, cv2.LINE_AA)
    cv2.imshow('Image', window)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cam.release()
cv2.destroyAllWindows()

## Reading again and converting to RGB

In [None]:
i = 0
for file in glob.glob("D://Data//Counting Finger//data_collection//*.png"):
    name = os.path.basename(file)
    
    i += 1
    if i%500 == 0:
        print (i)
    cv2.imwrite("D://Data//Counting Finger//data_collection_v2//{}".format(name), cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2RGB))

### Visualization

In [None]:
temp = load_img(
    "D:\\Data\\Counting Finger\\fingers\\test\\000e7aa6-100b-4c6b-9ff0-e7a8e53e4465_5L.png", 
    target_size = (128, 128, 3)
)

temp_arr = img_to_array(temp)
temp_arr = np.reshape(temp_arr, (1, 128, 128, 3))
model_loaded_saved_weight.predict_classes(temp_arr)

for i in glob.glob("D:\\Data\\Counting Finger\\fingers\\test\\*.png"):
    temp = load_img(
        i,
        target_size = (128, 128, 3)
    )

    temp_arr = img_to_array(temp)
    temp_arr = np.reshape(temp_arr, (1, 128, 128, 3))
    print (model_loaded_saved_weight.predict_classes(temp_arr))

In [None]:
layer_outputs = [layer.output for layer in model_loaded_saved_weight.layers]

activation_model = Model(
    inputs = model.input, 
    outputs = layer_outputs
)

activations = activation_model.predict(temp_arr) 

layer_names = []
for layer in model_loaded_saved_weight.layers[:8]:
    layer_names.append(layer.name) # Names of the layers, so you can have them as part of your plot
    
images_per_row = 16

for layer_name, layer_activation in zip(layer_names, activations): # Displays the feature maps
    n_features = layer_activation.shape[-1] # Number of features in the feature map
    size = layer_activation.shape[1] #The feature map has shape (1, size, size, n_features).
    n_cols = n_features // images_per_row # Tiles the activation channels in this matrix
    display_grid = np.zeros((size * n_cols, images_per_row * size))
    for col in range(n_cols): # Tiles each filter into a big horizontal grid
        for row in range(images_per_row):
            channel_image = layer_activation[0, :, :, col * images_per_row + row]
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size, row * size : (row + 1) * size] = channel_image
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1], scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.axis('off')
    plt.imshow(display_grid, aspect='auto', cmap='viridis')

In [None]:
for layer in model_loaded_saved_weight.layers[8:]:
    layer_names.append(layer.name)
    
images_per_row = 16

for layer_name, layer_activation in zip(layer_names, activations): # Displays the feature maps
    n_features = layer_activation.shape[-1] # Number of features in the feature map
    size = layer_activation.shape[1] #The feature map has shape (1, size, size, n_features).
    n_cols = n_features // images_per_row # Tiles the activation channels in this matrix
    display_grid = np.zeros((size * n_cols, images_per_row * size))
    for col in range(n_cols): # Tiles each filter into a big horizontal grid
        for row in range(images_per_row):
            channel_image = layer_activation[0, :, :, col * images_per_row + row]
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size, row * size : (row + 1) * size] = channel_image
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1], scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.axis('off')
    plt.imshow(display_grid, aspect='auto', cmap='viridis')