In [1]:
from pathlib import Path
import random
import re
import cv2
import numpy as np
from imageio import v3 as iio
import parser
# For Visualization in Jupyter
import ipywidgets as widgets
from matplotlib import pyplot as plt
from IPython.display import display, Image, Video
import csv
import pandas as pd
# Get images and video into Jupyter from your webcam
from ipywebrtc import CameraStream, ImageRecorder, VideoRecorder

In [2]:
def get_position(img_dir):
    #result = []
    filename = f'{img_dir}/../annotations.csv'
    df = pd.read_csv(filename)
    return(df)

In [109]:
#Create train, validation and test datasets and labels
img_dir_all = [Path("course_dataset/ASL_letter_A/videos"),
           Path("course_dataset/ASL_letter_B/videos"),
           Path("course_dataset/ASL_letter_C/videos"),
           Path("course_dataset/ASL_letter_L/videos"),
           Path("course_dataset/ASL_letter_R/videos"),
           Path("course_dataset/ASL_letter_U/videos")]

image_size = 128
threshold = 23
data_train = []
data_valid = []
data_test = []
label_train = []
label_valid = []
label_test = []
index = 0
for flag, img_dir in enumerate(img_dir_all): 
    files = [file for file in img_dir.iterdir() if file.suffix == ".mp4"]
    files.sort()
    print(flag)
    if flag == 0:
        row = [1,0,0,0,0,0]
    elif flag == 1:
        row = [0,1,0,0,0,0]
    elif flag == 2:
        row = [0,0,1,0,0,0]
    elif flag == 3:
        row = [0,0,0,1,0,0]
    elif flag == 4:
        row = [0,0,0,0,1,0]
    else:
        row = [0,0,0,0,0,1]
    #positions = get_position(img_dir)
    for index, file in enumerate(files):
        video_no = int(re.findall(r"\d+", str(file))[0])
        #print(video_no)
        frames = iio.imread(file)
        for frame_no, image in enumerate(frames):
            #if random.random() <= threshold:
            if frame_no % threshold == 0:
                image = cv2.resize(image, (image_size, image_size))
            #print(image.shape)
                if index < 23:
                    label_train.append(row)
                    data_train.append(image)
                
                elif index >= 23 and index < 32:
                    label_valid.append(row)
                    data_valid.append(image)
            
                else:
                    label_test.append(row)
                    data_test.append(image)
            #print(len(data), label.reshape(-1,20,2).shape)
data_train = np.stack(data_train, axis=0)
data_valid = np.stack(data_valid, axis=0)
data_test = np.stack(data_test, axis=0)
label_train = np.stack(label_train, axis=0)
label_valid = np.stack(label_valid, axis=0)
label_test = np.stack(label_test, axis=0)
print(data_train.shape, data_valid.shape, data_test.shape)
print(label_train.shape, label_valid.shape, label_test.shape)
#print(len(label_train), len(label_valid), len(label_test))

0
1
2
3
4
5
(1327, 128, 128, 3) (482, 128, 128, 3) (527, 128, 128, 3)
(1327, 6) (482, 6) (527, 6)


In [112]:
# VGG16: Preprocessing and predict
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

model = VGG16(weights = 'imagenet', include_top=False, pooling = 'avg', input_shape=(128, 128, 3))
print(model.summary())

data_train_copy = preprocess_input(data_train)
data_valid_copy = preprocess_input(data_valid)
data_test_copy = preprocess_input(data_test)

vgg16_feature_train = model.predict(data_train_copy)
vgg16_feature_valid = model.predict(data_valid_copy)
vgg16_feature_test = model.predict(data_test_copy)

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0     

In [119]:
#Build and compile fully connected model with 6 outputs for each image
from keras import layers
from keras import models
from keras import optimizers
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D
from keras.constraints import maxnorm
from tensorflow.keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.models import Sequential

model_fc = models.Sequential()
model_fc.add(layers.Dense(2048, activation='relu', input_dim = 1 * 512))
model_fc.add(layers.Dropout(0.5))
model_fc.add(layers.Dense(512, activation='relu', input_dim = 1 * 512))
model_fc.add(layers.Dropout(0.5))
model_fc.add(layers.Dense(128, activation='relu', input_dim = 1 * 512))
model_fc.add(layers.Dropout(0.5))
model_fc.add(layers.Dense(6, activation='softmax'))

sgd = SGD(learning_rate=0.01, momentum=0.5, decay=1e-6, nesterov=False)
model_fc.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model_fc.summary()

In [122]:
#Predict output and evaluate the performance metrics
checkpoint = ModelCheckpoint('cnn_best.h5',  
                             monitor='val_accuracy', 
                             verbose=0, 
                             save_best_only= True, 
                             mode='max') 
    
earlystop = EarlyStopping(monitor='val_accuracy', mode='min', verbose=1, patience=20)
model_fc.fit(vgg16_feature_train, label_train, validation_data=(vgg16_feature_valid, label_valid), epochs=100, batch_size=32, callbacks=[earlystop, checkpoint],
                   verbose=1)

score = model_fc.evaluate(vgg16_feature_test, label_test, verbose=0)
print('Test accuracy:', score[1])

result = model_fc.predict(vgg16_feature_test)

confusion_matrix = [[0 for col in range(6)] for row in range(6)]
label_sum = [0] * 6 
for x in range(result.shape[0]):
    l1 = list(result[x])
    i1 = l1.index(max(l1))
    l2 = list(label_test[x])
    i2 = l2.index(max(l2))
    label_sum[i2] += 1
    confusion_matrix[i2][i1] += 1
    
print(confusion_matrix)
print(label_sum)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 00021: early stopping


<keras.callbacks.History at 0x179d5bd8850>

In [126]:
# Resnet50: Preprocessing and predict
import tensorflow.keras as K
import tensorflow as tf

res_model = K.applications.ResNet50(include_top=False, weights="imagenet", input_shape=(128,128,3), pooling = 'avg')
print(res_model.summary())

data_train_copy = K.applications.resnet50.preprocess_input(data_train)
data_valid_copy = K.applications.resnet50.preprocess_input(data_valid)
data_test_test = K.applications.resnet50.preprocess_input(data_test)

print(data_train_copy.shape, data_valid_copy.shape, data_test_test.shape)

res50_feature_train = res_model.predict(data_train_copy)
res50_feature_valid = res_model.predict(data_valid_copy)
res50_feature_test = res_model.predict(data_test_test)

print(res50_feature_train.shape, res50_feature_valid.shape, res50_feature_test.shape)

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 134, 134, 3)  0           input_7[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 64, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 64, 64, 64)   256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [128]:
#Build and compile fully connected model with 6 outputs for each image
model_res = models.Sequential()
model_res.add(layers.Dense(2048, activation='relu', input_dim = 1 * 2048))
model_res.add(layers.Dropout(0.5))
model_res.add(layers.Dense(512, activation='relu', input_dim = 1 * 2048))
model_res.add(layers.Dropout(0.5))
model_res.add(layers.Dense(256, activation='relu', input_dim = 1 * 2048))
model_res.add(layers.Dropout(0.5))
model_res.add(layers.Dense(6, activation='softmax'))

sgd = SGD(learning_rate=0.01, momentum=0.5, decay=1e-6, nesterov=False)
model_res.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model_res.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_36 (Dense)             (None, 2048)              4196352   
_________________________________________________________________
dropout_27 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_37 (Dense)             (None, 512)               1049088   
_________________________________________________________________
dropout_28 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_38 (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_29 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_39 (Dense)             (None, 6)                

In [None]:
#Predict output and evaluate the performance metrics
model_res.fit(res50_feature_train, label_train, validation_data=(res50_feature_valid, label_valid), epochs=epochs, batch_size=32, callbacks=[earlystop, checkpoint],
                   verbose=1)

score = model_res.evaluate(res50_feature_test, label_test, verbose=0)
print('Test accuracy:', score[1])

result = model_res.predict(res50_feature_test)

confusion_matrix = [[0 for col in range(6)] for row in range(6)]
label_sum = [0] * 6 
for x in range(result.shape[0]):
    l1 = list(result[x])
    i1 = l1.index(max(l1))
    l2 = list(label_test[x])
    i2 = l2.index(max(l2))
    label_sum[i2] += 1
    confusion_matrix[i2][i1] += 1
    
print(confusion_matrix)
print(label_sum)