In [94]:
import numpy as np
import os
from imageio import imread
import matplotlib.pyplot as plt
# import warnings
# warnings.filterwarnings('ignore')
from keras.models import Sequential
from keras.layers import Dense, Flatten, Flatten, BatchNormalization, Dropout
from keras.layers import Conv3D, MaxPooling3D
from keras.callbacks import ReduceLROnPlateau
from keras import optimizers
from sklearn.model_selection import StratifiedKFold
import csv

In [95]:
## Checking the GPU configuration
!nvidia-smi

import tensorflow as tf

if tf.test.gpu_device_name():
    print('GPU found')
else:
    print("No GPU found")

Mon Mar 18 16:30:32 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 522.06       Driver Version: 522.06       CUDA Version: 11.8     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   54C    P8     1W /  N/A |   2830MiB /  4096MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [96]:
data_path = './dataset_aug'
csv_path = './dataset_aug.csv'

data = []
with open(csv_path, 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    for row in csvreader:
        data.append(row)

data = np.array(data)

num_folds = 5
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=34)

gesture_num = 6
np.random.seed(34)

In [97]:
def normalizeImage(image):
    # applying normalization
    return image/255.0

In [98]:
def make3dFilter(x):
    return tuple([x]*3)

def make2dFilter(x):
    return tuple([x]*2)

In [99]:
def getBatchData(source_path, t, batch, batch_size, img_tensor):
    [x,y,z] = [len(img_tensor[0]),img_tensor[1], img_tensor[2]]
    img_idx = img_tensor[0]
    batch_data = np.zeros((batch_size,x,y,z,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
    batch_labels = np.zeros((batch_size, gesture_num)) # batch_labels is the one hot representation of the output
    for folder in range(batch_size): # iterate over the batch_size
        vid = t[folder + (batch*batch_size)]
        vid_path = os.path.join(source_path, os.path.join(vid[1], vid[0]))
        imgs = os.listdir(vid_path) # read all the images in the folder
        for idx,item in enumerate(img_idx): #  Iterate over the frames/images of a folder to read them in
#             print("idx: ", idx)
#             print("item: ",item)
            image = imread(os.path.join(vid_path, imgs[item])).astype(np.float32)

            #crop the images and resize them. Note that the images are of 2 different shape 
            #and the conv3D will throw error if the inputs in a batch have different shapes

            # separate preprocessImage function is defined for cropping, resizing and normalizing images
            batch_data[folder,idx,:,:,0] = normalizeImage(image[:, :, 0])
            batch_data[folder,idx,:,:,1] = normalizeImage(image[:, :, 1])
            batch_data[folder,idx,:,:,2] = normalizeImage(image[:, :, 2])

#         print("folder: ", folder)
#         print("index:  ", int(t[folder + (batch*batch_size)].strip().split(',')[2]))
        batch_labels[folder, int(vid[2])] = 1
    return batch_data, batch_labels

In [100]:
def generator(source_path, folder_list, batch_size, img_tensor):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    while True:
        t = np.random.permutation(folder_list)
        num_batches = int(len(folder_list)/batch_size)
        for batch in range(num_batches): # we iterate over the number of batches
            yield getBatchData(source_path, t, batch, batch_size, img_tensor)
        # write the code for the remaining data points which are left after full batches
        # checking if any remaining batches are there or not
        if len(folder_list)%batch_size != 0:
            # updated the batch size and yield
            batch_size = len(folder_list)%batch_size
            yield getBatchData(source_path, t, batch, batch_size, img_tensor)

In [101]:
def getImgTensor(n_frames):
    img_idx = np.round(np.linspace(0, 29, n_frames)).astype(int)
    return [img_idx, 90, 160, 3]

In [102]:
def plotModelHistory(fold_results):
    fig, ax = plt.subplots(1, 2, figsize=(15,4))

    total_val_loss = 0
    total_val_acc = 0

    train_loss = []
    val_loss = []
    train_acc = []
    val_acc = []

    for i, fold_result in enumerate(fold_results):
        total_val_loss += fold_results[i]['val_loss']
        total_val_acc += fold_results[i]['val_accuracy']

        h = fold_result['history']
        train_loss.append(h.history['loss'])
        val_loss.append(h.history['val_loss'])
        train_acc.append(h.history['categorical_accuracy'])
        val_acc.append(h.history['val_categorical_accuracy'])

    avg_train_loss = np.mean(train_loss, axis=0)
    avg_val_loss = np.mean(val_loss, axis=0)
    std_train_loss = np.std(train_loss, axis=0)
    std_val_loss = np.std(val_loss, axis=0)

    ax[0].plot(range(1, len(avg_train_loss) + 1), avg_train_loss)   
    ax[0].plot(range(1, len(avg_val_loss) + 1), avg_val_loss)
    ax[0].fill_between(range(1, len(avg_train_loss) + 1), avg_train_loss - std_train_loss, avg_train_loss + std_train_loss, alpha=0.2, color='blue')
    ax[0].fill_between(range(1, len(avg_val_loss) + 1), avg_val_loss - std_val_loss, avg_val_loss + std_val_loss, alpha=0.2, color='orange')
    ax[0].legend(['loss','val_loss'])
    ax[0].title.set_text("Train loss vs Validation loss")

    avg_train_acc = np.mean(train_acc, axis=0)
    avg_val_acc = np.mean(val_acc, axis=0)
    std_train_acc = np.std(train_acc, axis=0)
    std_val_acc = np.std(val_acc, axis=0)

    ax[1].plot(range(1, len(avg_train_acc) + 1), avg_train_acc)   
    ax[1].plot(range(1, len(avg_val_acc) + 1), avg_val_acc)
    ax[1].fill_between(range(1, len(avg_train_acc) + 1), avg_train_acc - std_train_acc, avg_train_acc + std_train_acc, alpha=0.2, color='blue')
    ax[1].fill_between(range(1, len(avg_val_acc) + 1), avg_val_acc - std_val_acc, avg_val_acc + std_val_acc, alpha=0.2, color='orange')
    ax[1].legend(['categorical_accuracy','val_categorical_accuracy'])
    ax[1].title.set_text("Train accuracy vs Validation accuracy")
    
    plt.show()
    print(f"Mean Validation Loss: {total_val_loss / len(fold_results)}")
    print(f"Mean Validation Accuracy: {total_val_acc / len(fold_results)}")

In [103]:
n_frames = 15
num_epochs = 30
batch_size = 8

img_tensor = getImgTensor(n_frames)
    
inputShape = (len(img_tensor[0]), img_tensor[1], img_tensor[2], img_tensor[3])
LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4)
callbacks_list = [LR]

model1 = Sequential([
    Conv3D(16, make3dFilter(5), activation='relu', input_shape=inputShape),
    MaxPooling3D(make3dFilter(2), padding='same'),
    BatchNormalization(),

    Conv3D(32, make3dFilter(3), activation='relu'),
    MaxPooling3D(pool_size=(1,2,2), padding='same'),
    BatchNormalization(),

    Conv3D(64, make3dFilter(3), activation='relu'),
    MaxPooling3D(pool_size=(1,2,2), padding='same'),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.25),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.25),

    Dense(gesture_num, activation='softmax')
], name="conv_3d1")
model1.compile(optimizer=optimizers.Adam(), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model1.summary())


Model: "conv_3d1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d_18 (Conv3D)          (None, 11, 86, 156, 16)   6016      
                                                                 
 max_pooling3d_18 (MaxPoolin  (None, 6, 43, 78, 16)    0         
 g3D)                                                            
                                                                 
 batch_normalization_30 (Bat  (None, 6, 43, 78, 16)    64        
 chNormalization)                                                
                                                                 
 conv3d_19 (Conv3D)          (None, 4, 41, 76, 32)     13856     
                                                                 
 max_pooling3d_19 (MaxPoolin  (None, 4, 21, 38, 32)    0         
 g3D)                                                            
                                                          

In [104]:
fold_results1 = []

for fold, (train_index, val_index) in enumerate(skf.split(data, data[:, 2])):
    print("fold: ", fold)
    train_doc, val_doc = data[train_index], data[val_index]
    
    train_generator = generator(data_path, train_doc, batch_size, img_tensor)
    val_generator = generator(data_path, val_doc, batch_size, img_tensor)

    num_train_sequences = len(train_doc)
    print('# training sequences =', num_train_sequences)
    num_val_sequences = len(val_doc)
    print('# validation sequences =', num_val_sequences)

    if (num_train_sequences%batch_size) == 0:
        steps_per_epoch = int(num_train_sequences/batch_size)
    else:
        steps_per_epoch = (num_train_sequences//batch_size) + 1

    if (num_val_sequences%batch_size) == 0:
        validation_steps = int(num_val_sequences/batch_size)
    else:
        validation_steps = (num_val_sequences//batch_size) + 1

    model1_history = model1.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                callbacks=callbacks_list, validation_data=val_generator, 
                validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

    validation_results = model1.evaluate_generator(val_generator, steps=validation_steps)
    print("Validation Loss:", validation_results[0])
    print("Validation Accuracy:", validation_results[1])

    fold_results1.append({
        'history': model1_history,
        'val_loss': validation_results[0],
        'val_accuracy': validation_results[1]
    })

fold:  0
# training sequences = 960
# validation sequences = 240
Source path =  ./dataset_aug ; batch size = 8
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
  3/120 [..............................] - ETA: 36s - loss: 0.7272 - categorical_accuracy: 0.7083

In [None]:
plotModelHistory(fold_results1)

In [None]:
n_frames = 30
num_epochs = 30
batch_size = 8

img_tensor = getImgTensor(n_frames)
    
inputShape = (len(img_tensor[0]), img_tensor[1], img_tensor[2], img_tensor[3])
LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4)
callbacks_list = [LR]

model2 = Sequential([
    Conv3D(16, make3dFilter(5), activation='relu', input_shape=inputShape),
    MaxPooling3D(make3dFilter(2), padding='same'),
    BatchNormalization(),

    Conv3D(32, make3dFilter(3), activation='relu'),
    MaxPooling3D(pool_size=(1,2,2), padding='same'),
    BatchNormalization(),

    Conv3D(64, make3dFilter(3), activation='relu'),
    MaxPooling3D(pool_size=(1,2,2), padding='same'),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.25),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.25),

    Dense(gesture_num, activation='softmax')
], name="conv_3d2")
model2.compile(optimizer=optimizers.Adam(), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model2.summary())


In [None]:
fold_results2 = []

for fold, (train_index, val_index) in enumerate(skf.split(data, data[:, 2])):
    print("fold: ", fold)
    train_doc, val_doc = data[train_index], data[val_index]
    
    train_generator = generator(data_path, train_doc, batch_size, img_tensor)
    val_generator = generator(data_path, val_doc, batch_size, img_tensor)

    num_train_sequences = len(train_doc)
    print('# training sequences =', num_train_sequences)
    num_val_sequences = len(val_doc)
    print('# validation sequences =', num_val_sequences)

    if (num_train_sequences%batch_size) == 0:
        steps_per_epoch = int(num_train_sequences/batch_size)
    else:
        steps_per_epoch = (num_train_sequences//batch_size) + 1

    if (num_val_sequences%batch_size) == 0:
        validation_steps = int(num_val_sequences/batch_size)
    else:
        validation_steps = (num_val_sequences//batch_size) + 1

    model1_history = model2.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                callbacks=callbacks_list, validation_data=val_generator, 
                validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

    validation_results = model2.evaluate_generator(val_generator, steps=validation_steps)
    print("Validation Loss:", validation_results[0])
    print("Validation Accuracy:", validation_results[1])

    fold_results2.append({
        'history': model1_history,
        'val_loss': validation_results[0],
        'val_accuracy': validation_results[1]
    })

In [None]:
plotModelHistory(fold_results2)