## Import the necessary libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Conv2D , Conv1D, MaxPool1D, MaxPool2D , Flatten , Dropout , BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix, roc_auc_score, accuracy_score
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
import joblib
import os
from PIL import Image
from sklearn.model_selection import train_test_split, KFold
from tqdm import tqdm
import re
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Dropout, Dense, Flatten, Input, Lambda
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau , EarlyStopping
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras import regularizers
import tensorflow as tf
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from sklearn.metrics import roc_auc_score,f1_score
import numpy as np
from sklearn.preprocessing import StandardScaler

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Use the albumentations library for augmentation pipeline
import albumentations as A

transform = A.Compose([
    # Randomly adjust the brightness of the image
    A.RandomBrightnessContrast(),

    # Randomly apply scaling, translation and shearing
    A.Affine(scale=[0.8,1.2],translate_percent=0.05, shear=0.2, keep_ratio=True, p=0.5),

    # Randomly apply rotation
    A.Rotate(limit=10)
])

## Without MediaPipe
This section contains the model without applying MediaPipe's hand landmark detection algorithm.


#### Read image and convert it to 28x28 matrix:

In [None]:
# Define directory path and initialize empty lists
folder_name = '/content/drive/MyDrive/IS4242/data' 
path = folder_name
img_list = []
label_list = []
txt = 'abcdefghiklmnopqrstuvwxy'

# Iterate through files listed in the directory
for i in tqdm(range(len(os.listdir(path)))):
  filename = os.listdir(path)[i]

  # If filename is of  type .jpg or .png, split the filename and take the first strings before a '-' or '_'
  if filename.endswith(('.jpg','.png')):
      label_name = re.split(r'[-_]', filename)[0].lower()
      if label_name not in txt:
        continue
      
      # Append the splitted filenames as labels
      label_list.append(label_name)

      # Open, resize image into 28 x 28 pixels and convert into greyscale then into np.array
      img = Image.open(os.path.join(path, filename))
      img = img.resize((28, 28), Image.ANTIALIAS)
      img = img.convert('L')
      img_array = np.array(img)
      img_list.append(img_array)


100%|██████████| 1221/1221 [02:55<00:00,  6.94it/s]


In [None]:
# Convert img_list into np.array and store as X 
X = np.array(img_list)

# Convert y data into np.array and use one-hot encoding to store the labels
y = np.array(label_list)
y = np.array(list(map(lambda x:txt.find(x), y)))
y = to_categorical(y)

#### Train test split

In [None]:
# Save X and y data as pickle
output_dir = '/content/drive/MyDrive/IS4242/dump/'
joblib.dump(X, output_dir+'X.pkl')
joblib.dump(y, output_dir+'y.pkl')

In [None]:
# Load X and y pickle data
output_dir = '/content/drive/MyDrive/IS4242/dump/'
X = joblib.load(output_dir+'X.pkl')
y = joblib.load(output_dir+'y.pkl')

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(634, 28, 28)
(159, 28, 28)
(634, 24)
(159, 24)


#### Augmentation only on train data

In [None]:
# Initialize augmented datalist
augmented_X = []
augmented_y = []

# Loop through the train data set
for i in range(len(X_train)):
  # Perform augmentation and transformation 5 times for each data point
  for j in (range(5)):
      transformed = transform(image=X_train[i])
      augmented_X.append(transformed['image'])
      augmented_y.append(y_train[i])

augmented_X = np.array(augmented_X)
augmented_y = np.array(augmented_y)

# Concatenate original set with augmentation set
X_train = np.concatenate([X_train,augmented_X])
y_train = np.concatenate([y_train,augmented_y])

# Save augmented data as pickle file
output_dir = '/content/drive/MyDrive/IS4242/dump/'
joblib.dump(augmented_X, output_dir+'augmented_X.pkl')
joblib.dump(augmented_y, output_dir+'augmented_y.pkl')

['/content/drive/MyDrive/IS4242/dump/augmented_y.pkl']

In [None]:
# Load augmented data
output_dir = '/content/drive/MyDrive/IS4242/dump/'
augmented_X = joblib.load(output_dir+'augmented_X.pkl')
augmented_y = joblib.load(output_dir+'augmented_y.pkl')
X_train = np.concatenate([X_train,augmented_X])
y_train = np.concatenate([y_train,augmented_y])

In [None]:
print(X_train.shape)
print(y_train.shape)

(6974, 28, 28)
(6974, 24)


#### Reshape and divide the pixels by 255

In [None]:
X_train = X_train/255.0
X_test = X_test/255.0
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(6974, 28, 28, 1)
(159, 28, 28, 1)
(6974, 24)
(159, 24)


#### CNN with augmentation

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images



datagen.fit(X_train)

In [None]:
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [None]:
# architecture
# Set the CNN model 

model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , strides = 2 , padding = 'same'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation = "relu"))
model.add(Dropout(0.3))
model.add(Dense(24, activation = "softmax"))
model.summary()
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_36 (Conv2D)          (None, 28, 28, 32)        832       
                                                                 
 conv2d_37 (Conv2D)          (None, 28, 28, 32)        25632     
                                                                 
 max_pooling2d_18 (MaxPoolin  (None, 14, 14, 32)       0         
 g2D)                                                            
                                                                 
 batch_normalization_18 (Bat  (None, 14, 14, 32)       128       
 chNormalization)                                                
                                                                 
 dropout_24 (Dropout)        (None, 14, 14, 32)        0         
                                                                 
 conv2d_38 (Conv2D)          (None, 14, 14, 64)       



In [None]:
epochs = 30
batch_size = 128


model_checkpoint_callback = ModelCheckpoint(
    filepath="best.h5",
    monitor='val_accuracy', 
    verbose=1, 
    save_best_only=True, 
    mode='max')

es = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)

# Fit the model
history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_test, y_test),steps_per_epoch=X_train.shape[0] // batch_size,
                              verbose = 1,   callbacks=[model_checkpoint_callback,learning_rate_reduction,es])

  history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),


Epoch 1/30
Epoch 1: val_accuracy improved from -inf to 0.05031, saving model to best.h5
Epoch 2/30
Epoch 2: val_accuracy did not improve from 0.05031
Epoch 3/30
Epoch 3: val_accuracy did not improve from 0.05031
Epoch 4/30
Epoch 4: val_accuracy did not improve from 0.05031

Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/30
Epoch 5: val_accuracy did not improve from 0.05031
Epoch 6/30
Epoch 6: val_accuracy improved from 0.05031 to 0.06918, saving model to best.h5
Epoch 7/30
Epoch 7: val_accuracy improved from 0.06918 to 0.12579, saving model to best.h5
Epoch 8/30
Epoch 8: val_accuracy improved from 0.12579 to 0.17610, saving model to best.h5
Epoch 9/30
Epoch 9: val_accuracy did not improve from 0.17610
Epoch 10/30
Epoch 10: val_accuracy improved from 0.17610 to 0.25157, saving model to best.h5
Epoch 11/30
Epoch 11: val_accuracy improved from 0.25157 to 0.27673, saving model to best.h5
Epoch 12/30
Epoch 12: val_accuracy improved from 0.27673 to 0.3144

In [None]:
results = model.predict(X_test)
y_pred = np.argmax(results,axis = 1) 
y_true = np.argmax(y_test,axis = 1) 




#### Results

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.33      0.40      0.36         5
           1       0.27      0.60      0.37         5
           2       0.75      0.25      0.38        12
           3       0.50      0.20      0.29         5
           4       0.33      0.57      0.42         7
           5       0.50      0.67      0.57         6
           6       0.00      0.00      0.00         5
           7       0.33      0.09      0.14        11
           8       0.60      0.60      0.60         5
           9       0.50      0.33      0.40         6
          10       0.40      0.67      0.50         3
          11       0.00      0.00      0.00         3
          12       0.00      0.00      0.00         5
          13       0.67      0.25      0.36         8
          14       0.33      0.14      0.20         7
          15       0.20      0.17      0.18         6
          16       0.33      0.25      0.29         8
          17       0.33    

In [None]:
# Calculate the ROC AUC using the micro-averaging method
roc_auc = roc_auc_score(y_test, results, multi_class='ovr', average='micro')
print("ROC AUC score (micro average):", roc_auc)
roc_auc = roc_auc_score(y_test, results, multi_class='ovr', average='macro')
print("ROC AUC score (macro average):", roc_auc)

ROC AUC score (micro average): 0.9164469622314748
ROC AUC score (macro average): 0.913476257382485


#### Prediction and evaluation

In [None]:
# test model prediction using the sign language letter m
img = Image.open('m.png')
img = img.resize((28, 28), Image.ANTIALIAS)
img = img.convert('L')
img_array = np.array(img)
img_array = img_array/255.0
img_array = img_array.reshape(-1, 28, 28, 1)

In [None]:
results = model.predict(img_array)
y_pred = np.argmax(results,axis = 1)
txt[y_pred[0]]



'm'

#### After hyperparameter tuning, train it with the whole data

In [None]:
# Load X and y
output_dir = '/content/drive/MyDrive/IS4242/dump/'
X = joblib.load(output_dir+'X.pkl')
y = joblib.load(output_dir+'y.pkl')

In [None]:
# Initialize augmented data list
augmentX = []
augmenty = []

# Iterate through data set
for i in range(len(X)):
  # Do 10 iterations of random transformation and add to augmented dataset
  for j in (range(10)):
      transformed = transform(image=X[i])
      augmentX.append(transformed['image'])
      augmenty.append(y[i])

augmentX = np.array(augmentX)
augmenty = np.array(augmenty)

#Concatenate original data with augmented set
X = np.concatenate([X,augmentX])
y = np.concatenate([y,augmenty])

In [None]:
# Resize and reshape X
X = X/255.0
X = X.reshape(-1, 28, 28, 1)

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,
        brightness_range=[0.8,1.2],
        vertical_flip=False)


datagen.fit(X)

In [None]:
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2) , padding = 'same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2) , padding = 'same'))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D((2,2) , padding = 'same'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dense(512, activation = "relu"))
model.add(Dense(24, activation = "softmax"))

model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])



In [None]:
epochs = 30
batch_size = 64

# Fit the model
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit_generator(datagen.flow(X, y, batch_size=batch_size),epochs = epochs)

#### Prediction and evaluation

In [None]:
# test model prediction using the sign language letter m
img = Image.open('m.png')
img = img.resize((28, 28), Image.ANTIALIAS)
img = img.convert('L')
img_array = np.array(img)
img_array = img_array/255.0
img_array = img_array.reshape(-1, 28, 28, 1)

In [None]:
results = model.predict(img_array)
y_pred = np.argmax(results,axis = 1)
txt[y_pred[0]]



'm'

## Using MediaPipe
This section contains the model using MediaPipe's hand landmark detection algorithm.

In [None]:
# Install MediaPipe
!pip install mediapipe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mediapipe
  Downloading mediapipe-0.9.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.6/33.6 MB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mediapipe
Successfully installed mediapipe-0.9.2.1


In [None]:
# For capturing hand coordinates
import cv2
import tensorflow as tf
import mediapipe as mp

In [None]:
# For processing data
import csv
import os
import numpy as np
import pandas as pd
from tqdm import tqdm 
import re

#### Convert image data to matrix

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
# Use the albumentations library for augmentation pipeline
import albumentations as A

transform = A.Compose([
    # Randomly adjust the brightness of the image
    A.RandomBrightnessContrast(),
        
    # Randomly apply scaling, translation and shearing
    A.Affine(scale=[0.8,1.2],translate_percent=0.05, shear=0.2, keep_ratio=True, p=0.5),
    
    # Randomly apply rotation
    A.Rotate(limit=10)
])

In [None]:
# Define directory path and initialize empty lists
folder_name = '/content/drive/MyDrive/IS4242/data'
path = folder_name
img_list = []
label_list = []
txt = 'abcdefghiklmnopqrstuvwxy'

# Iterate through files listed in the directory
for i in tqdm(range(len(os.listdir(path)))):
  filename = os.listdir(path)[i]

  # If filename is of  type .jpg or .png, split the filename and take the first strings before a '-' or '_'
  if filename.endswith(('.jpg','.png')):
      label_name = re.split(r'[-_]', filename)[0].lower()
      if label_name not in txt:
        continue

      # Append the splitted filenames as labels
      label_list.append(label_name)

      # Open, resize image into 28 x 28 pixels and convert into greyscale then into np.array
      img = Image.open(os.path.join(path, filename))
      img = img.resize((224, 224), Image.ANTIALIAS) # bigger size
      img = img.convert('L')
      img_array = np.array(img)
      
      img_list.append(img_array)

# Convert img_list into np.array and store as X 
X = np.array(img_list)

# Convert y data into np.array and use one-hot encoding to store the labels
y = np.array(label_list)
y = np.array(list(map(lambda x:txt.find(x), y)))
y = to_categorical(y)


100%|██████████| 1221/1221 [02:59<00:00,  6.80it/s]


In [None]:
output_dir = '/content/drive/MyDrive/IS4242/dump/'
joblib.dump(X,output_dir+'X_224.pkl')
joblib.dump(y,output_dir+'y_224.pkl')

In [None]:
output_dir = '/content/drive/MyDrive/IS4242/dump/'
X = joblib.load(output_dir+'X_224.pkl')
y = joblib.load(output_dir+'y_224.pkl')

In [None]:
len(X)

817

#### Train test split

In [None]:
from sklearn.model_selection import train_test_split

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Augmentation only on train data

In [None]:
# Initialize augmented datalist
augmented_X = []
augmented_y = []

# Loop through the train data set
for i in range(len(X_train)):
  
  # Perform augmentation and transformation 10 times for each data point
  for j in (range(10)):
      transformed = transform(image=X_train[i])
      augmented_X.append(transformed['image'])
      augmented_y.append(y_train[i])

# Concatenate original set with augmentation set
augmented_X = np.array(augmented_X)
augmented_y = np.array(augmented_y)
X_train = np.concatenate([X_train,augmented_X])
y_train = np.concatenate([y_train,augmented_y])

In [None]:
# Save augmented data as pickle file
output_dir = '/content/drive/MyDrive/IS4242/dump/'
joblib.dump(X_train,output_dir+'X_224_augmented.pkl')
joblib.dump(y_train,output_dir+'y_224_augmented.pkl')

['/content/drive/MyDrive/IS4242/dump/y_224_augmented.pkl']

In [None]:
# Load augmented data
output_dir = '/content/drive/MyDrive/IS4242/dump/'
X_train = joblib.load(output_dir+'X_224_augmented.pkl')
y_train = joblib.load(output_dir+'y_224_augmented.pkl')

#### Generate dataset of hand points

In [None]:
# Initialize MediPipe drawing utils and hand detection models
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Initialize lists to store features and labels
data = []
labels = []
count = 0 # keeps track of failed detections

with mp_hands.Hands(static_image_mode =True, max_num_hands = 2, min_detection_confidence=0.5) as hands:
    for i in tqdm(range(len(X_train))):
        # Load the image and extract its features
            results = hands.process(cv2.cvtColor(cv2.flip(X_train[i],1), cv2.COLOR_BGR2RGB))
            try:
                # Extract Hand landmarks
                for hand_landmark in results.multi_hand_landmarks:
                    right_hand = hand_landmark.landmark
                right_hand_row = list(np.array([[landmark.x, landmark.y] for landmark in right_hand]).flatten())
                # Concate rows
                row = right_hand_row

                # Extract the label from the image filename (e.g. "A.jpg")
                label = y_train[i]

                data.append(row)
                labels.append(label)
            except Exception as e:
                count += 1
                continue


100%|██████████| 7183/7183 [05:13<00:00, 22.90it/s]


In [None]:
pd.DataFrame(data)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,0.570230,0.700385,0.460918,0.601834,0.385230,0.526549,0.322623,0.470859,0.301588,0.408549,...,0.434779,0.504095,0.549578,0.469310,0.416643,0.467967,0.423916,0.518278,0.466331,0.533848
1,0.446183,0.796893,0.341452,0.681229,0.304357,0.500339,0.314089,0.366598,0.315794,0.253817,...,0.488412,0.601652,0.734732,0.564161,0.699286,0.469736,0.620621,0.544259,0.574593,0.615079
2,0.554823,0.467337,0.477951,0.500206,0.370337,0.497704,0.303738,0.458727,0.319072,0.396847,...,0.466813,0.410920,0.478718,0.277710,0.441688,0.218711,0.420079,0.179862,0.393535,0.143242
3,0.421717,0.593616,0.332333,0.543942,0.278878,0.457589,0.282298,0.384428,0.314716,0.334537,...,0.391403,0.521059,0.528068,0.395230,0.475645,0.423229,0.459146,0.468935,0.459199,0.488524
4,0.651359,0.619728,0.656911,0.514619,0.602316,0.457691,0.539696,0.429281,0.491382,0.435781,...,0.391995,0.521948,0.437633,0.637292,0.367256,0.589515,0.384333,0.575371,0.412115,0.575475
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3156,0.583231,0.452619,0.539714,0.455225,0.495395,0.492904,0.479851,0.551855,0.483757,0.602373,...,0.545168,0.576425,0.487555,0.524171,0.515908,0.578480,0.545968,0.580548,0.566784,0.575146
3157,0.562530,0.498690,0.511731,0.506001,0.454501,0.544319,0.426224,0.600555,0.422746,0.650416,...,0.465353,0.691858,0.439111,0.577353,0.453883,0.642066,0.476830,0.662664,0.497206,0.675142
3158,0.563214,0.502042,0.515475,0.514002,0.460635,0.550754,0.430844,0.599683,0.423958,0.642966,...,0.467354,0.680093,0.444119,0.574531,0.456100,0.633061,0.477790,0.649635,0.497778,0.658029
3159,0.583231,0.452619,0.539714,0.455225,0.495395,0.492904,0.479851,0.551855,0.483757,0.602373,...,0.545168,0.576425,0.487555,0.524171,0.515908,0.578480,0.545968,0.580548,0.566784,0.575146


In [None]:
# Dumps mediapipe train data
output_dir = f"/content/drive/MyDrive/IS4242/dump/"
joblib.dump(data, output_dir+'data_cnn_train.pkl')
joblib.dump(labels, output_dir+'labels_cnn_train.pkl')

['/content/drive/MyDrive/IS4242/dump/labels_cnn_train.pkl']

In [None]:
# Loads mediapipe train data
data = joblib.load(output_dir+'data_cnn_train.pkl')
labels = joblib.load(output_dir+'labels_cnn_train.pkl')

In [None]:
# mediapipe conversion into landmark points for test data
test_data = []
test_label = []
count = 0
with mp_hands.Hands(static_image_mode =True, max_num_hands = 2, min_detection_confidence=0.5) as hands:
    for i in tqdm(range(len(X_test))):
        # Load the image and extract its features
            results = hands.process(cv2.cvtColor(cv2.flip(X_test[i],1), cv2.COLOR_BGR2RGB))
            try:
                # Extract Hand landmarks
                for hand_landmark in results.multi_hand_landmarks:
                    right_hand = hand_landmark.landmark
                right_hand_row = list(np.array([[landmark.x, landmark.y] for landmark in right_hand]).flatten())
                # Concate rows
                row = right_hand_row

                # Extract the label from the image filename (e.g. "A.jpg")
                label = y_test[i]

                test_data.append(row)
                test_label.append(label)
                print(row, label)
            except Exception as e:
                count += 1
                continue

In [None]:
# Dumps mediapipe test data
joblib.dump(test_data, output_dir+'data_cnn_test.pkl')
joblib.dump(test_label, output_dir+'labels_cnn_test.pkl')

['/content/drive/MyDrive/IS4242/dump/labels_cnn_test.pkl']

In [None]:
# Loads mediapipe train data
test_data = joblib.load(output_dir+'data_cnn_test.pkl')
test_label = joblib.load(output_dir+'labels_cnn_test.pkl')

In [None]:
# Convert labels into np.array
labels = np.array(labels)
test_label = np.array(test_label)

In [None]:
# Convert features into np.array
x_train = np.array(data)
x_test = np.array(test_data)

# Scale feature vectors using StandardScaler
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

# Reshape feature vectors for input to Convolutional Neural Network
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
print(x_train.shape)
print(x_test.shape)

(2754, 42, 1)
(51, 42, 1)


#### Define CNN architecture

In [None]:
# define function to generate CNN model
def CNN_model(input_shape, conv_layers) :
  #Initalize model
  model = tf.keras.models.Sequential()

  # Add convolutional layers
  for filters, kernel_size in conv_layers : 
    if conv_layers.index((filters, kernel_size)) == 0 :
        model.add(tf.keras.layers.Conv1D(filters = filters, kernel_size = kernel_size, strides = 1, padding = "same", activation = "relu", input_shape = input_shape))
    else :
        model.add(tf.keras.layers.Conv1D(filters = filters, kernel_size = kernel_size, strides = 1, padding = "same", activation = "relu"))
    model.add(tf.keras.layers.Conv1D(filters = filters, kernel_size = kernel_size, strides = 1, padding = "same", activation = "relu"))
    model.add(tf.keras.layers.MaxPooling1D(pool_size = 2))
  
  # Add dropout layer to prevent overfitting
  model.add(tf.keras.layers.Dropout(rate = 0.2))

  # Flatten layer to 1D array
  model.add(tf.keras.layers.Flatten())

  # Add dense layers for classification
  model.add(tf.keras.layers.Dense(512, activation = "relu"))
  model.add(tf.keras.layers.Dense(24, activation = "softmax"))

  # Compile and return model
  model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

In [None]:
conv_layers = [(32, 3), (64, 3), (128, 3)]
model = CNN_model((42,1), conv_layers)
model.summary()

Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_180 (Conv1D)         (None, 42, 32)            128       
                                                                 
 conv1d_181 (Conv1D)         (None, 42, 32)            3104      
                                                                 
 max_pooling1d_90 (MaxPoolin  (None, 21, 32)           0         
 g1D)                                                            
                                                                 
 conv1d_182 (Conv1D)         (None, 21, 64)            6208      
                                                                 
 conv1d_183 (Conv1D)         (None, 21, 64)            12352     
                                                                 
 max_pooling1d_91 (MaxPoolin  (None, 10, 64)           0         
 g1D)                                                

#### Cross Validation

Check model performance with cross validation and do hyperparameter tuning

In [None]:
# Split train test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.shape

(653, 224, 224)

In [None]:
y_train.shape

(653, 24)

In [None]:
# Define function that converts data points into mediapipe landmark points
def convert_mediapipe(X, y) :
    mp_drawing = mp.solutions.drawing_utils
    mp_hands = mp.solutions.hands
    count = 0
    labels = []
    data = []
    with mp_hands.Hands(static_image_mode =True, max_num_hands = 2, min_detection_confidence=0.5) as hands:
        for i in tqdm(range(len(X))):
        # Load the image and extract its features
            results = hands.process(cv2.cvtColor(cv2.flip(X[i],1), cv2.COLOR_BGR2RGB))
            try:
                # Extract Hand landmarks
                for hand_landmark in results.multi_hand_landmarks:
                    right_hand = hand_landmark.landmark
                right_hand_row = list(np.array([[landmark.x, landmark.y] for landmark in right_hand]).flatten())
                # Concate rows
                row = right_hand_row

                # Extract the label from the image filename (e.g. "A.jpg")
                label = y[i]

                data.append(row)
                labels.append(label)
            except Exception as e:
                count += 1
                continue
    return data, labels

In [None]:
from sklearn.model_selection import StratifiedKFold

# Define function for cross validation
def cross_validate(model, X, y, n_splits = 5) :
    kf = StratifiedKFold(n_splits = n_splits, shuffle = True, random_state=42)
    accuracy_scores = []
    auc_scores_micro = []
    f1_scores = []
    for i, (train_index, test_index) in enumerate(kf.split(X,np.argmax(y,axis=1))):
        print(f"Fold {i+1}/{n_splits}:")

        # Split into train and validation sets
        X_train, X_val = X[train_index], X[test_index]
        y_train, y_val = y[train_index], y[test_index]

        # Augment training data
        augmented_X = []
        augmented_y = []
        for i in range(len(X_train)) :
            for j in (range(10)) :
                transformed = transform(image=X_train[i])
                augmented_X.append(transformed['image'])
                augmented_y.append(y_train[i])

        augmented_X = np.array(augmented_X)
        augmented_y = np.array(augmented_y)
        X_train = np.concatenate([X_train,augmented_X])
        y_train = np.concatenate([y_train,augmented_y])

        # Convert data into mediapipe points
        X_train, y_train = convert_mediapipe(X_train, y_train)
        X_val, y_val = convert_mediapipe(X_val, y_val)
        y_train = np.array(y_train)
        y_val = np.array(y_val)
        X_train = np.array(X_train)
        X_val = np.array(X_val)
        scaler = StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_val = scaler.transform(X_val)
        X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
        X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], 1))

        conv_layers = [(32, 3), (64, 3), (128, 3)]
        model = CNN_model((42,1), conv_layers)
        # Train the model on the training data
        model.fit(X_train, y_train, epochs = 30, batch_size = 32, verbose = 0)

        # Evaluate the model on the test data
        res = model.predict(X_val)
        y_pred = np.argmax(res, axis = 1)
        y_t = np.argmax(y_val, axis = 1)
        accuracy_scores.append(accuracy_score(y_t, y_pred))
        # Use micro avg since it is sensitive to class imbalance
        auc_scores_micro.append(roc_auc_score(y_val,res, multi_class='ovr', average='micro'))
        f1_scores.append(f1_score(y_t, y_pred, average='weighted'))
        print("ROC AUC score:", auc_scores_micro[-1])
        print(f"Validation accuracy: {accuracy_scores[-1]}")
        print(f"Validation F1: {f1_scores[-1]}")

    # Compute the mean accuracy and AUC ROC across all folds
    mean_accuracy = np.mean(accuracy_scores)
    mean_f1 = np.mean(f1_scores)
    mean_roc_micro = np.mean(auc_scores_micro)
    print(f"\nMean cross-validation accuracy: {mean_accuracy:.3f}")
    print(f"\nMean cross-validation F1: {mean_f1:.3f}")
    print(f"\nMean cross-validation AUC-ROC (micro): {mean_roc_micro:.3f}")

    return mean_accuracy, mean_roc_micro, mean_f1


In [None]:
conv_layers = [(32, 3), (64, 3), (128, 3)]
model = CNN_model((42,1), conv_layers)
cross_validate(model, X_train, y_train)

Fold 1/5:


100%|██████████| 5742/5742 [04:13<00:00, 22.67it/s]
100%|██████████| 131/131 [00:04<00:00, 27.43it/s]


ROC AUC score: 0.9631233362910382
Validation accuracy: 0.7142857142857143
Validation F1: 0.6800680272108843
Fold 2/5:


100%|██████████| 5742/5742 [04:01<00:00, 23.77it/s]
100%|██████████| 131/131 [00:04<00:00, 26.57it/s]


ROC AUC score: 0.981625258799172
Validation accuracy: 0.8095238095238095
Validation F1: 0.7913832199546486
Fold 3/5:


100%|██████████| 5742/5742 [04:05<00:00, 23.43it/s]
100%|██████████| 131/131 [00:04<00:00, 26.40it/s]


ROC AUC score: 0.9720404521118382
Validation accuracy: 0.6341463414634146
Validation F1: 0.6146341463414634
Fold 4/5:


100%|██████████| 5753/5753 [04:02<00:00, 23.70it/s]
100%|██████████| 130/130 [00:05<00:00, 21.88it/s]


ROC AUC score: 0.9621311744883702
Validation accuracy: 0.717391304347826
Validation F1: 0.7185300207039339
Fold 5/5:


100%|██████████| 5753/5753 [04:00<00:00, 23.96it/s]
100%|██████████| 130/130 [00:05<00:00, 25.54it/s]


ROC AUC score: 0.9463708228530364
Validation accuracy: 0.6590909090909091
Validation F1: 0.6598484848484849

Mean cross-validation accuracy: 0.707

Mean cross-validation F1: 0.693

Mean cross-validation AUC-ROC (micro): 0.965


(0.7068876157423347, 0.965058208908691, 0.6928927798118831)

#### Check performance with test set

In [None]:
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [None]:
epochs = 30
batch_size = 32

conv_layers = [(32, 3), (64, 3), (128, 3)]
model = CNN_model((42,1), conv_layers)
model_checkpoint_callback = ModelCheckpoint(
    filepath="best.h5",
    monitor='val_accuracy', 
    verbose=1, 
    save_best_only=True, 
    mode='max')

es = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=20)

model = CNN_model(x_train.shape[1:3], conv_layers)
# Fit the model
history = model.fit(x_train, labels, batch_size=batch_size,
                              epochs = epochs, validation_data = (x_test, test_label),steps_per_epoch=x_train.shape[0] // batch_size,
                              verbose = 1,   callbacks=[model_checkpoint_callback,learning_rate_reduction,es])

Epoch 1/30
Epoch 1: val_accuracy improved from -inf to 0.45098, saving model to best.h5
Epoch 2/30
Epoch 2: val_accuracy improved from 0.45098 to 0.68627, saving model to best.h5
Epoch 3/30
Epoch 3: val_accuracy did not improve from 0.68627
Epoch 4/30
Epoch 4: val_accuracy improved from 0.68627 to 0.74510, saving model to best.h5
Epoch 5/30
Epoch 5: val_accuracy improved from 0.74510 to 0.76471, saving model to best.h5
Epoch 6/30
Epoch 6: val_accuracy did not improve from 0.76471
Epoch 7/30
Epoch 7: val_accuracy did not improve from 0.76471
Epoch 8/30
Epoch 8: val_accuracy did not improve from 0.76471

Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/30
Epoch 9: val_accuracy did not improve from 0.76471
Epoch 10/30
Epoch 10: val_accuracy did not improve from 0.76471
Epoch 11/30
Epoch 11: val_accuracy did not improve from 0.76471

Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 12/30
Epoch 12: val_accuracy did not imp

#### Prediction and evaluation

In [None]:
results = model.predict(x_test)
y_pred = np.argmax(results,axis = 1) 
y_true = np.argmax(test_label,axis = 1) 




In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       1.00      1.00      1.00         3
           2       0.00      0.00      0.00         2
           3       1.00      0.50      0.67         2
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         2
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2
           9       0.50      1.00      0.67         2
          10       1.00      1.00      1.00         3
          11       0.00      0.00      0.00         1
          12       0.50      1.00      0.67         1
          13       0.50      1.00      0.67         1
          14       0.67      1.00      0.80         2
          15       1.00      1.00      1.00         2
          16       0.67      1.00      0.80         2
          17       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
roc_auc = roc_auc_score(test_label, results, multi_class='ovr', average='micro')
print("ROC AUC score (micro average):", roc_auc)
roc_auc = roc_auc_score(test_label, results, multi_class='ovr', average='macro')
print("ROC AUC score (macro average):", roc_auc)


ROC AUC score (micro average): 0.9835681928355315
ROC AUC score (macro average): 0.9788832752368086


### Train on whole dataset

In [None]:
# Initialize list for augmented data
augmented_X = []
augmented_y = []

# Iterates through data
for i in range(len(X)):

  # Generate 10 iterations of augmented data and store in list
  for j in (range(10)):
      transformed = transform(image=X[i])
      augmented_X.append(transformed['image'])
      augmented_y.append(y[i])

# Convert augmented data into np.array and concatenate with original dataset
augmented_X = np.array(augmented_X)
augmented_y = np.array(augmented_y)
X = np.concatenate([X,augmented_X])
y = np.concatenate([y,augmented_y])

In [None]:
# Convert points using mediapipe
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
data = []
labels = []
count = 0
with mp_hands.Hands(static_image_mode =True, max_num_hands = 2, min_detection_confidence=0.5) as hands:
    for i in tqdm(range(len(X))):
        # Load the image and extract its features
            results = hands.process(cv2.cvtColor(cv2.flip(X[i],1), cv2.COLOR_BGR2RGB))
            try:
                # Extract Hand landmarks
                for hand_landmark in results.multi_hand_landmarks:
                    right_hand = hand_landmark.landmark
                right_hand_row = list(np.array([[landmark.x, landmark.y] for landmark in right_hand]).flatten())
                # Concate rows
                row = right_hand_row

                # Extract the label from the image filename (e.g. "A.jpg")
                label = y[i]

                data.append(row)
                labels.append(label)
            except Exception as e:
                count += 1
                continue


100%|██████████| 8987/8987 [06:38<00:00, 22.58it/s]


In [None]:
# Dump complete datsaset with augmentation
joblib.dump(data, output_dir+'data_cnn_full.pkl')
joblib.dump(labels, output_dir+'labels_cnn_full.pkl')

['/content/drive/MyDrive/IS4242/dump/labels_cnn_full.pkl']

In [None]:
# Loads complete datsaset with augmentation
data = joblib.load(output_dir+'data_cnn_full.pkl')
labels = joblib.load(output_dir+'labels_cnn_full.pkl')


In [None]:
# Convert labels and features as np.array
labels = np.array(labels)
data = np.array(data)

In [None]:
from sklearn.preprocessing import StandardScaler
# Scales the features using StandardScaler
scaler = StandardScaler().fit(data)
x = scaler.transform(data)
x = np.reshape(x, (x.shape[0], x.shape[1], 1))
print(x.shape)

(2946, 42, 1)


In [None]:
labels.shape

(2946, 24)

#### CNN Architecture with augmentation

In [None]:
# Construct CNN model
conv_layers = [(32, 3), (64, 3), (128, 3)]
model = CNN_model((42,1), conv_layers)
model.summary()

Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_135 (Conv1D)         (None, 42, 32)            128       
                                                                 
 conv1d_136 (Conv1D)         (None, 42, 32)            3104      
                                                                 
 max_pooling1d_102 (MaxPooli  (None, 21, 32)           0         
 ng1D)                                                           
                                                                 
 conv1d_137 (Conv1D)         (None, 21, 64)            6208      
                                                                 
 conv1d_138 (Conv1D)         (None, 21, 64)            12352     
                                                                 
 max_pooling1d_103 (MaxPooli  (None, 10, 64)           0         
 ng1D)                                               

In [None]:
epochs = 30
batch_size = 32

# Fit the model
history = model.fit(x, labels, batch_size=batch_size,
                              epochs = epochs,steps_per_epoch=x.shape[0] // batch_size,
                              verbose = 1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
model.save('cnn.h5')

#### Prediction and evaluation

In [None]:
results = model.predict(x)
y_pred = np.argmax(results,axis = 1) 
y_true = np.argmax(labels,axis = 1) 




In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       134
           1       0.99      1.00      1.00       127
           2       1.00      0.99      0.99       139
           3       1.00      0.97      0.99       111
           4       1.00      0.99      1.00       134
           5       1.00      1.00      1.00       138
           6       1.00      1.00      1.00       142
           7       1.00      1.00      1.00       129
           8       1.00      0.99      1.00       103
           9       1.00      1.00      1.00       113
          10       1.00      0.99      1.00       126
          11       0.98      1.00      0.99        96
          12       1.00      0.99      0.99        98
          13       0.98      1.00      0.99        93
          14       1.00      1.00      1.00       130
          15       0.99      1.00      1.00       103
          16       1.00      0.99      1.00       109
          17       1.00    

In [None]:
from sklearn.metrics import roc_auc_score
import numpy as np

# Calculate the ROC AUC using the micro-averaging method
roc_auc = roc_auc_score(labels, results, multi_class='ovo', average='micro')
print("ROC AUC score:", roc_auc)


ROC AUC score: 0.9999623475318006
