# Gesture Recognition
In this group project, you are going to build a 3D Conv model that will be able to predict the 5 gestures correctly. Please import the following libraries to get started.

In [29]:
!pip install imageio



In [1]:
import numpy as np
import os
import imageio
from PIL import Image
#resize
import datetime
import os

We set the random seed so that the results don't vary drastically.

In [2]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(30)

In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

In [3]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 40

## Generator
This is one of the most important part of the code. The overall structure of the generator has been given. In the generator, you are going to preprocess the images as you have images of 2 different dimensions as well as create a batch of video frames. You have to experiment with `img_idx`, `y`,`z` and normalization such that you get high accuracy.

In [4]:
def crop_img(image, crop_fraction):
    """
    Crop the image by a certain fraction of its width, maintaining the center.
    The crop_fraction should be a float representing the fraction of the width to crop.
    The image's RGB channels will be preserved.
    
    Parameters:
    - image: numpy array of shape (height, width, channels), representing the image.
    - crop_fraction: float, the fraction of the image's width to crop (0.10 means cropping 10% from both sides).
    
    Returns:
    - cropped_image: numpy array of the cropped image.
    """
    # Ensure the image has 3 channels (RGB)
    if image.shape[2] != 3:
        raise ValueError("Image should have 3 channels (RGB).")

    # Get the height, width, and channels of the image
    h, w, c = image.shape

    # Calculate the width to crop from each side
    crop_w = int(crop_fraction * w)

    # Determine the start and end indices for cropping
    start_w = crop_w // 2
    end_w = w - crop_w // 2

    # Crop the image, preserving all RGB channels
    cropped_image = image[:, start_w:end_w, :]

    return cropped_image


In [5]:
def generator(source_path, folder_list, batch_size):
    print('Source path = ', source_path, '; batch size =', batch_size)
    img_idx = list(range(30))  # We will use 30 frames per video (adjustable)
    x, y, z = len(img_idx), 160, 160  # x is the number of images, y and z are the target dimensions

    while True:
        t = np.random.permutation(folder_list)  # Shuffle the list of folders (videos)
        num_batches = len(folder_list) // batch_size  # Calculate number of full batches
        print('num_batches = ', num_batches)

        for batch in range(num_batches):  # Iterate over the number of batches
            batch_data = np.zeros((batch_size, x, y, z, 3))  # Initialize batch data (40 samples, 30 frames, 160x160, 3 channels)
            batch_labels = np.zeros((batch_size, 5))  # Initialize batch labels (one-hot encoding for 5 classes)
            
            for folder in range(batch_size):  # Iterate over the batch size (40 samples)
                folder_path = source_path + '/' + t[folder + (batch * batch_size)].split(';')[0]
                imgs = os.listdir(folder_path)  # Read all image files in the folder

                for idx, item in enumerate(img_idx):  # Iterate over the frames/images (using img_idx)
                    img_path = os.path.join(folder_path, imgs[item])  # Get the image path
                    image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio
                    
                    # Crop and resize images to ensure uniformity in shape
                    image = crop_img(image, 0.10)  # Crop 10% from both sides
                    
                    # Resize the image to 160x160 using PIL 
                    image = Image.fromarray(image.astype(np.uint8))  # Convert numpy array to PIL image
                    image = image.resize((y, z), Image.Resampling.LANCZOS)  # Resize using Pillow
                    image = np.array(image).astype(np.float32)  # Convert back to numpy array
                    
                    # Normalize the image by mean normalization (subtract mean and divide by standard deviation)
                    image = (image - np.mean(image)) / np.std(image)  # Mean normalization

                    # Load the image into the 3 RGB channels
                    # Ensure we only store each colour channel separately
                    batch_data[folder, idx, :, :, 0] = image[:, :, 0]  # Red channel
                    batch_data[folder, idx, :, :, 1] = image[:, :, 1]  # Green channel
                    batch_data[folder, idx, :, :, 2] = image[:, :, 2]  # Blue channel

                # Set the one-hot encoding for the label (class)
                class_idx = int(t[folder + (batch * batch_size)].strip().split(';')[2])  # Get the class index from the CSV file
                batch_labels[folder, class_idx] = 1  # One-hot encoding for the class label

            yield batch_data, batch_labels  # Yield the batch of data and labels

        # Handle remaining data points (if not a perfect multiple of batch_size)
        if len(folder_list) % batch_size != 0:
            remaining_samples = len(folder_list) % batch_size
            batch_data_remaining = np.zeros((remaining_samples, x, y, z, 3))  # Remaining batch data
            batch_labels_remaining = np.zeros((remaining_samples, 5))  # Remaining batch labels

            for folder in range(remaining_samples):  # Process the remaining samples
                folder_path = source_path + '/' + t[folder + (num_batches * batch_size)].split(';')[0]
                imgs = os.listdir(folder_path)  # Read all image files in the folder

                for idx, item in enumerate(img_idx):  # Iterate over frames/images of a folder
                    img_path = os.path.join(folder_path, imgs[item])  # Get the image path
                    image = imageio.imread(img_path).astype(np.float32)  # Read the image

                    # Crop and resize the images to ensure uniform shape
                    image = crop_img(image, 0.10)  # Crop 10% from both sides
                    
                    # Resize the image to 160x160 using PIL
                    image = Image.fromarray(image.astype(np.uint8))  # Convert numpy array to PIL image
                    image = image.resize((y, z), Image.Resampling.LANCZOS)  # Resize using Pillow
                    image = np.array(image).astype(np.float32)  # Convert back to numpy array
                    
                    # Normalize the image
                    image = (image - np.mean(image)) / np.std(image)  # Mean normalization

                    # Load the image into the 3 RGB channels
                    # Ensure we only store each colour channel separately
                    batch_data[folder, idx, :, :, 0] = image[:, :, 0]  # Red channel
                    batch_data[folder, idx, :, :, 1] = image[:, :, 1]  # Green channel
                    batch_data[folder, idx, :, :, 2] = image[:, :, 2]  # Blue channel

                # Set the one-hot encoding for the class
                class_idx = int(t[folder + (num_batches * batch_size)].strip().split(';')[2])
                batch_labels_remaining[folder, class_idx] = 1  # One-hot encoding for the class label

            yield batch_data_remaining, batch_labels_remaining  # Yield the final batch with the remaining data


Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

In [6]:
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
num_epochs = 30
print ('# epochs =', num_epochs)

# training sequences = 663
# validation sequences = 100
# epochs = 30


## Model
Here you make the model using different functionalities that Keras provides. Remember to use `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D` for a 3D convolution model. You would want to use `TimeDistributed` while building a Conv2D + RNN model. Also remember that the last layer is the softmax. Design the network in such a way that the model is able to give good accuracy on the least number of parameters so that it can fit in the memory of the webcam.

#### Experiment 1

Conv 3D Model with 30 epochs, 40 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 160x160 , adam optimiser with learning rate 0.0001, 30 images as input out of 30

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv3D, MaxPooling3D, Flatten, BatchNormalization, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# Initialize the model
model = Sequential()

# Add 3D Convolutional Layers and Max Pooling Layers
# Input shape is (30, 160, 160, 3) -> (number of frames, height, width, channels)

# First Conv3D layer
model.add(Conv3D(16, (3, 3, 3), padding='same', input_shape=(30, 160, 160, 3)))  
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Second Conv3D layer
model.add(Conv3D(32, (3, 3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Third Conv3D layer
model.add(Conv3D(64, (3, 3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Flatten the output of Conv3D layers before passing to fully connected layers
model.add(Flatten())

# Dense Layer for Classification
model.add(Dense(128))
model.add(Activation('relu'))

# Output Layer (5 classes for classification)
model.add(Dense(5, activation='softmax'))  # Assuming there are 5 classes for classification

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [8]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(model.summary())

None


Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`.

In [9]:
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

In [10]:
from keras.callbacks import ReduceLROnPlateau

model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
# Save the model in .keras format during training
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.weights.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, 
                             save_best_only=False, save_weights_only=True, mode='auto',  save_freq='epoch')

# Define the ReduceLROnPlateau callback
LR = ReduceLROnPlateau(monitor='val_loss', 
                       factor=0.2, 
                       patience=5, 
                       verbose=1, 
                       min_lr=0.0001)

callbacks_list = [checkpoint, LR]

The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

In [11]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 17
Validation steps: 3


Let us now fit the model. This will start training the model and with the help of the checkpoints, you'll be able to save the model at the end of each epoch.

In [13]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data/train ; batch size = 40
num_batches =  16


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/30
[1m15/17[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m59s[0m 30s/step - categorical_accuracy: 0.2098 - loss: 6.0074 

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m16/17[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m29s[0m 29s/step - categorical_accuracy: 0.2136 - loss: 5.8725num_batches =  16
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28s/step - categorical_accuracy: 0.2166 - loss: 5.7489 Source path =  Project_data/val ; batch size = 40
num_batches =  2
num_batches =  2

Epoch 1: saving model to model_init_2024-12-2608_18_45.693183/model-00001-3.77187-0.26546-1.53121-0.31000.keras
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m504s[0m 30s/step - categorical_accuracy: 0.2193 - loss: 5.6391 - val_categorical_accuracy: 0.3100 - val_loss: 1.5312 - learning_rate: 1.0000e-04
Epoch 2/30
[1m16/17[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m57s[0m 57s/step - categorical_accuracy: 0.5335 - loss: 1.1130 num_batches =  16
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54s/step - categorical_accuracy: 0.5356 - loss: 1.1121 num_batches =  2

Epoch 2: saving model to model_init_2024-12-2608_18_45.693

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m509s[0m 30s/step - categorical_accuracy: 0.9970 - loss: 0.0560 - val_categorical_accuracy: 0.4917 - val_loss: 1.3587 - learning_rate: 1.0000e-04
Epoch 15/30
[1m16/17[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m29s[0m 30s/step - categorical_accuracy: 1.0000 - loss: 0.0447num_batches =  16
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - categorical_accuracy: 0.9987 - loss: 0.0479 num_batches =  2

Epoch 15: saving model to model_init_2024-12-2608_18_45.693183/model-00015-0.09852-0.97738-1.48156-0.49167.keras
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m502s[0m 29s/step - categorical_accuracy: 0.9975 - loss: 0.0507 - val_categorical_accuracy: 0.4917 - val_loss: 1.4816 - learning_rate: 1.0000e-04
Epoch 16/30
[1m16/17[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m30s[0m 30s/step - categorical_accuracy: 1.0000 - loss: 0.0394 num_batches =  16
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x304be8dd0>

So we have got the training accuarcy of 99 % and validation accuracy of 47 % in the base model

#### Experiment - 2
Conv 3D Model with 15 epochs, 25 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 120x120 , adam optimiser with learning rate 0.0001, 18 images as input out of 30

In [15]:
def generator_2(source_path, folder_list, batch_size):
    print('Source path = ', source_path, '; batch size =', batch_size)
    img_idx = [0,1,2,4,6,8,10,12,14,16,18,20,22,24,26,27,28,29]
    #list(range(30))  # We will use 30 frames per video (adjustable)
    x, y, z = len(img_idx), 120, 120  # x is the number of images, y and z are the target dimensions

    while True:
        t = np.random.permutation(folder_list)  # Shuffle the list of folders (videos)
        num_batches = len(folder_list) // batch_size  # Calculate number of full batches
        print('num_batches = ', num_batches)

        for batch in range(num_batches):  # Iterate over the number of batches
            batch_data = np.zeros((batch_size, x, y, z, 3))  # Initialize batch data (40 samples, 30 frames, 160x160, 3 channels)
            batch_labels = np.zeros((batch_size, 5))  # Initialize batch labels (one-hot encoding for 5 classes)
            
            for folder in range(batch_size):  # Iterate over the batch size (40 samples)
                folder_path = source_path + '/' + t[folder + (batch * batch_size)].split(';')[0]
                imgs = os.listdir(folder_path)  # Read all image files in the folder

                for idx, item in enumerate(img_idx):  # Iterate over the frames/images (using img_idx)
                    img_path = os.path.join(folder_path, imgs[item])  # Get the image path
                    image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio
                    
                    # Crop and resize images to ensure uniformity in shape
                    image = crop_img(image, 0.10)  # Crop 10% from both sides
                    
                    # Resize the image to 160x160 using PIL 
                    image = Image.fromarray(image.astype(np.uint8))  # Convert numpy array to PIL image
                    image = image.resize((y, z), Image.Resampling.LANCZOS)  # Resize using Pillow
                    image = np.array(image).astype(np.float32)  # Convert back to numpy array
                    
                    # Normalize the image by mean normalization (subtract mean and divide by standard deviation)
                    image = (image - np.mean(image)) / np.std(image)  # Mean normalization

                    # Load the image into the 3 RGB channels
                    # Ensure we only store each colour channel separately
                    batch_data[folder, idx, :, :, 0] = image[:, :, 0]  # Red channel
                    batch_data[folder, idx, :, :, 1] = image[:, :, 1]  # Green channel
                    batch_data[folder, idx, :, :, 2] = image[:, :, 2]  # Blue channel

                # Set the one-hot encoding for the label (class)
                class_idx = int(t[folder + (batch * batch_size)].strip().split(';')[2])  # Get the class index from the CSV file
                batch_labels[folder, class_idx] = 1  # One-hot encoding for the class label

            yield batch_data, batch_labels  # Yield the batch of data and labels

        # Handle remaining data points (if not a perfect multiple of batch_size)
        if len(folder_list) % batch_size != 0:
            remaining_samples = len(folder_list) % batch_size
            batch_data_remaining = np.zeros((remaining_samples, x, y, z, 3))  # Remaining batch data
            batch_labels_remaining = np.zeros((remaining_samples, 5))  # Remaining batch labels

            for folder in range(remaining_samples):  # Process the remaining samples
                folder_path = source_path + '/' + t[folder + (num_batches * batch_size)].split(';')[0]
                imgs = os.listdir(folder_path)  # Read all image files in the folder

                for idx, item in enumerate(img_idx):  # Iterate over frames/images of a folder
                    img_path = os.path.join(folder_path, imgs[item])  # Get the image path
                    image = imageio.imread(img_path).astype(np.float32)  # Read the image

                    # Crop and resize the images to ensure uniform shape
                    image = crop_img(image, 0.10)  # Crop 10% from both sides
                    
                    # Resize the image to 160x160 using PIL
                    image = Image.fromarray(image.astype(np.uint8))  # Convert numpy array to PIL image
                    image = image.resize((y, z), Image.Resampling.LANCZOS)  # Resize using Pillow
                    image = np.array(image).astype(np.float32)  # Convert back to numpy array
                    
                    # Normalize the image
                    image = (image - np.mean(image)) / np.std(image)  # Mean normalization

                    # Load the image into the 3 RGB channels
                    # Ensure we only store each colour channel separately
                    batch_data[folder, idx, :, :, 0] = image[:, :, 0]  # Red channel
                    batch_data[folder, idx, :, :, 1] = image[:, :, 1]  # Green channel
                    batch_data[folder, idx, :, :, 2] = image[:, :, 2]  # Blue channel

                # Set the one-hot encoding for the class
                class_idx = int(t[folder + (num_batches * batch_size)].strip().split(';')[2])
                batch_labels_remaining[folder, class_idx] = 1  # One-hot encoding for the class label

            yield batch_data_remaining, batch_labels_remaining  # Yield the final batch with the remaining data


In [16]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 25
num_epochs = 15
print ('# epochs =', num_epochs)

# epochs = 15


In [17]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, BatchNormalization, Activation, Dropout, GlobalAveragePooling3D, Dense
from tensorflow.keras import regularizers

# Hyperparameters
dropout_rate = 0.5
l2_reg = 0.01
num_classes = 5

# Initialize the model
model = Sequential()

# First Conv3D layer
model.add(Conv3D(
    16, 
    (3, 3, 3), 
    padding='same', 
    input_shape=(18, 120, 120, 3), 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))


# Second Conv3D layer
model.add(Conv3D(
    32, 
    (3, 3, 3), 
    padding='same', 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))


# Third Conv3D layer
model.add(Conv3D(
    64, 
    (3, 3, 3), 
    padding='same', 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))


# Global Average Pooling and Fully Connected Layers
model.add(GlobalAveragePooling3D())
model.add(Dropout(dropout_rate))
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)))


# Output Layer
model.add(Dense(num_classes, activation='softmax'))


In [19]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(model.summary())

None


In [20]:
train_generator = generator_2(train_path, train_doc, batch_size)
val_generator = generator_2(val_path, val_doc, batch_size)

In [21]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 27
Validation steps: 4


In [21]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data/train ; batch size = 25
num_batches =  26


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/15
[1m25/27[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m10s[0m 5s/step - categorical_accuracy: 0.2002 - loss: 3.4224

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m26/27[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m5s[0m 5s/step - categorical_accuracy: 0.2001 - loss: 3.4187 num_batches =  26
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - categorical_accuracy: 0.1999 - loss: 3.4151Source path =  Project_data/val ; batch size = 25
num_batches =  4
num_batches =  4

Epoch 1: saving model to model_init_2024-12-2619_34_37.119648/model-00001-3.32135-0.19457-3.08584-0.22000.keras
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 6s/step - categorical_accuracy: 0.1997 - loss: 3.4117 - val_categorical_accuracy: 0.2200 - val_loss: 3.0858 - learning_rate: 1.0000e-04
Epoch 2/15
[1m26/27[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m5s[0m 5s/step - categorical_accuracy: 0.2602 - loss: 3.1374 num_batches =  26
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - categorical_accuracy: 0.2600 - loss: 3.1374num_batches =  4

Epoch 2: saving model to model_init_2024-12-2619_34_37.119648/mode

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 5s/step - categorical_accuracy: 0.4595 - loss: 2.5573 - val_categorical_accuracy: 0.4900 - val_loss: 2.5157 - learning_rate: 1.0000e-04
Epoch 15/15
[1m26/27[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m5s[0m 5s/step - categorical_accuracy: 0.4580 - loss: 2.5370 num_batches =  26
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - categorical_accuracy: 0.4578 - loss: 2.5377num_batches =  4

Epoch 15: saving model to model_init_2024-12-2619_34_37.119648/model-00015-2.55539-0.45249-2.43803-0.51000.keras
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 5s/step - categorical_accuracy: 0.4576 - loss: 2.5383 - val_categorical_accuracy: 0.5100 - val_loss: 2.4380 - learning_rate: 1.0000e-04


<keras.src.callbacks.history.History at 0x309bffc50>

So we have got the training accuarcy of 45 % and validation accuracy of 51 % in the base model

#### Experiment - 3
Conv 3D Model with 20 epochs, 30 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 120x120 , adam optimiser with learning rate 0.0001, 18 images as input out of 30

In [98]:
#train_doc = np.random.permutation(open('Project_data_Final/train_100.csv').readlines())
#val_doc = np.random.permutation(open('Project_data_Final/val_50.csv').readlines())

train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

batch_size = 30
num_epochs = 20
print ('# epochs =', num_epochs)

# epochs = 20


In [97]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, BatchNormalization, Activation, Dropout, GlobalAveragePooling3D, Dense
from tensorflow.keras import regularizers

# Hyperparameters
l2_reg = 0.01
num_classes = 5

# Initialize the model
model = Sequential()

# First Conv3D layer
model.add(Conv3D(
    16, 
    (3, 3, 3), 
    padding='same', 
    input_shape=(18, 120, 120, 3), 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Second Conv3D layer
model.add(Conv3D(
    32, 
    (3, 3, 3), 
    padding='same', 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Third Conv3D layer
model.add(Conv3D(
    64, 
    (3, 3, 3), 
    padding='same', 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Global Average Pooling and Fully Connected Layers
model.add(GlobalAveragePooling3D())
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)))

# Output Layer
model.add(Dense(num_classes, activation='softmax'))


In [98]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(model.summary())

None


In [99]:
train_generator = generator_2(train_path, train_doc, batch_size)
val_generator = generator_2(val_path, val_doc, batch_size)

In [100]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 23
Validation steps: 4


In [101]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data_Final/train ; batch size = 30


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/20
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m7s[0m 4s/step - categorical_accuracy: 0.1950 - loss: 3.3500 

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


num_batches =  22
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - categorical_accuracy: 0.1969 - loss: 3.3429Source path =  Project_data_Final/val ; batch size = 30
num_batches =  3

Epoch 1: saving model to model_init_2024-12-2619_21_51.635635/model-00001-3.26843-0.21719-3.13768-0.18000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 4s/step - categorical_accuracy: 0.1977 - loss: 3.3398 - val_categorical_accuracy: 0.1800 - val_loss: 3.1377 - learning_rate: 1.0000e-04
Epoch 2/20
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m7s[0m 4s/step - categorical_accuracy: 0.3225 - loss: 3.0582 num_batches =  22
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - categorical_accuracy: 0.3239 - loss: 3.0559num_batches =  3

Epoch 2: saving model to model_init_2024-12-2619_21_51.635635/model-00002-3.03176-0.33786-3.08669-0.28000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 4s/step - categoric

<keras.src.callbacks.history.History at 0x1f104aa8910>

So we have got the training accuarcy of 89 % and validation accuracy of 76 % in the base model

#### Experiment - 4
Conv 3D Model with 50 epochs, 30 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 160x160 , adam optimiser with learning rate 0.0001, 18 images as input out of 30

In [95]:
#train_doc = np.random.permutation(open('Project_data_Final/train_100.csv').readlines())
#val_doc = np.random.permutation(open('Project_data_Final/val_50.csv').readlines())

train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

batch_size = 30
num_epochs = 50
print ('# epochs =', num_epochs)

# epochs = 50


In [16]:
def generator_3(source_path, folder_list, batch_size):
    print('Source path = ', source_path, '; batch size =', batch_size)
    img_idx = [0,1,2,4,5,6,8,9,10,12,14,16,18,20,22,24,26,27,28,29]
    #list(range(30))  # We will use 30 frames per video (adjustable)
    x, y, z = len(img_idx), 160, 160  # x is the number of images, y and z are the target dimensions

    while True:
        t = np.random.permutation(folder_list)  # Shuffle the list of folders (videos)
        num_batches = len(folder_list) // batch_size  # Calculate number of full batches
        

        for batch in range(num_batches):  # Iterate over the number of batches
            batch_data = np.zeros((batch_size, x, y, z, 3))  # Initialize batch data (40 samples, 30 frames, 160x160, 3 channels)
            batch_labels = np.zeros((batch_size, 5))  # Initialize batch labels (one-hot encoding for 5 classes)
            
            for folder in range(batch_size):  # Iterate over the batch size (40 samples)
                folder_path = source_path + '/' + t[folder + (batch * batch_size)].split(';')[0]
                imgs = os.listdir(folder_path)  # Read all image files in the folder

                for idx, item in enumerate(img_idx):  # Iterate over the frames/images (using img_idx)
                    img_path = os.path.join(folder_path, imgs[item])  # Get the image path
                    image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio
                    
                    # Crop and resize images to ensure uniformity in shape
                    image = crop_img(image, 0.10)  # Crop 10% from both sides
                    
                    # Resize the image to 160x160 using PIL 
                    image = Image.fromarray(image.astype(np.uint8))  # Convert numpy array to PIL image
                    image = image.resize((y, z), Image.Resampling.LANCZOS)  # Resize using Pillow
                    image = np.array(image).astype(np.float32)  # Convert back to numpy array
                    
                    # Normalize the image by mean normalization (subtract mean and divide by standard deviation)
                    image = (image - np.mean(image)) / np.std(image)  # Mean normalization

                    # Load the image into the 3 RGB channels
                    # Ensure we only store each colour channel separately
                    batch_data[folder, idx, :, :, 0] = image[:, :, 0]  # Red channel
                    batch_data[folder, idx, :, :, 1] = image[:, :, 1]  # Green channel
                    batch_data[folder, idx, :, :, 2] = image[:, :, 2]  # Blue channel

                # Set the one-hot encoding for the label (class)
                class_idx = int(t[folder + (batch * batch_size)].strip().split(';')[2])  # Get the class index from the CSV file
                batch_labels[folder, class_idx] = 1  # One-hot encoding for the class label

            yield batch_data, batch_labels  # Yield the batch of data and labels

        # Handle remaining data points (if not a perfect multiple of batch_size)
        if len(folder_list) % batch_size != 0:
            remaining_samples = len(folder_list) % batch_size
            batch_data_remaining = np.zeros((remaining_samples, x, y, z, 3))  # Remaining batch data
            batch_labels_remaining = np.zeros((remaining_samples, 5))  # Remaining batch labels

            for folder in range(remaining_samples):  # Process the remaining samples
                folder_path = source_path + '/' + t[folder + (num_batches * batch_size)].split(';')[0]
                imgs = os.listdir(folder_path)  # Read all image files in the folder

                for idx, item in enumerate(img_idx):  # Iterate over frames/images of a folder
                    img_path = os.path.join(folder_path, imgs[item])  # Get the image path
                    image = imageio.imread(img_path).astype(np.float32)  # Read the image

                    # Crop and resize the images to ensure uniform shape
                    image = crop_img(image, 0.10)  # Crop 10% from both sides
                    
                    # Resize the image to 160x160 using PIL
                    image = Image.fromarray(image.astype(np.uint8))  # Convert numpy array to PIL image
                    image = image.resize((y, z), Image.Resampling.LANCZOS)  # Resize using Pillow
                    image = np.array(image).astype(np.float32)  # Convert back to numpy array
                    
                    # Normalize the image
                    image = (image - np.mean(image)) / np.std(image)  # Mean normalization

                    # Load the image into the 3 RGB channels
                    # Ensure we only store each colour channel separately
                    batch_data[folder, idx, :, :, 0] = image[:, :, 0]  # Red channel
                    batch_data[folder, idx, :, :, 1] = image[:, :, 1]  # Green channel
                    batch_data[folder, idx, :, :, 2] = image[:, :, 2]  # Blue channel

                # Set the one-hot encoding for the class
                class_idx = int(t[folder + (num_batches * batch_size)].strip().split(';')[2])
                batch_labels_remaining[folder, class_idx] = 1  # One-hot encoding for the class label
                
            yield batch_data_remaining, batch_labels_remaining  # Yield the final batch with the remaining data


In [105]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, BatchNormalization, Activation, Dropout, GlobalAveragePooling3D, Dense
from tensorflow.keras import regularizers

# Hyperparameters
l2_reg = 0.01
num_classes = 5

# Initialize the model
model = Sequential()

# First Conv3D layer
model.add(Conv3D(
    16, 
    (3, 3, 3), 
    padding='same', 
    input_shape=(18, 160, 160, 3), 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Second Conv3D layer
model.add(Conv3D(
    32, 
    (3, 3, 3), 
    padding='same', 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Third Conv3D layer
model.add(Conv3D(
    64, 
    (3, 3, 3), 
    padding='same', 
    kernel_regularizer=regularizers.l2(l2_reg)
))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Global Average Pooling and Fully Connected Layers
model.add(GlobalAveragePooling3D())
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)))

# Output Layer
model.add(Dense(num_classes, activation='softmax'))


In [106]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(model.summary())

None


In [107]:
train_generator = generator_3(train_path, train_doc, batch_size)
val_generator = generator_3(val_path, val_doc, batch_size)

In [108]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 23
Validation steps: 4


In [109]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data_Final/train ; batch size = 30


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/50
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m12s[0m 6s/step - categorical_accuracy: 0.1887 - loss: 3.4841

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - categorical_accuracy: 0.1910 - loss: 3.4673Source path =  Project_data_Final/val ; batch size = 30

Epoch 1: saving model to model_init_2024-12-2619_21_51.635635/model-00001-3.29081-0.21418-3.13427-0.21000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 7s/step - categorical_accuracy: 0.1919 - loss: 3.4600 - val_categorical_accuracy: 0.2100 - val_loss: 3.1343 - learning_rate: 1.0000e-04
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6s/step - categorical_accuracy: 0.2971 - loss: 3.0345
Epoch 2: saving model to model_init_2024-12-2619_21_51.635635/model-00002-3.02796-0.31523-3.08710-0.20000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 6s/step - categorical_accuracy: 0.2978 - loss: 3.0343 - val_categorical_accuracy: 0.2000 - val_loss: 3.0871 - learning_rate: 1.0000e-04
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

<keras.src.callbacks.history.History at 0x1efdc920910>

So we have got the training accuarcy of 93 % and validation accuracy of 77 % in the base model

#### Experiment 5
CNN + LSTM Model with 30 epochs, 30 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 120x120 , adam optimiser with learning rate 0.0001, 20 images as input out of 30

In [14]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 30
num_epochs = 30
print ('# epochs =', num_epochs)

# epochs = 30


In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Activation, Flatten, Dense, Dropout, LSTM, TimeDistributed
from tensorflow.keras import Input, regularizers

# Hyperparameters
# dropout_rate = 0.5
l2_reg = 0.01
num_classes = 5

# Input shape: (number of frames, height, width, channels)
input_shape = (20, 120, 120, 3)

# Initialize the model
model = Sequential()

# TimeDistributed CNN Layers
model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(l2_reg)), input_shape=input_shape))
model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(l2_reg))))
model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

model.add(TimeDistributed(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(l2_reg))))
model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten spatial features
model.add(TimeDistributed(Flatten()))

# LSTM Layers
model.add(LSTM(128, return_sequences=False, kernel_regularizer=regularizers.l2(l2_reg)))
# model.add(Dropout(dropout_rate))

# Fully Connected Layer
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)))
# model.add(Dropout(dropout_rate))

# Output Layer
model.add(Dense(num_classes, activation='softmax'))


  super().__init__(**kwargs)


In [16]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(model.summary())

None


In [18]:
train_generator = generator_2(train_path, train_doc, batch_size)
val_generator = generator_2(val_path, val_doc, batch_size)

In [19]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 23
Validation steps: 4


In [20]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data/train ; batch size = 30
num_batches =  22


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/30
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m54s[0m 27s/step - categorical_accuracy: 0.2398 - loss: 13.9791 

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m27s[0m 27s/step - categorical_accuracy: 0.2424 - loss: 13.9596num_batches =  22
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26s/step - categorical_accuracy: 0.2447 - loss: 13.9416 Source path =  Project_data/val ; batch size = 30
num_batches =  3
num_batches =  3

Epoch 1: saving model to model_init_2024-12-2707_52_10.735881/model-00001-13.54548-0.29563-12.55466-0.35000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m617s[0m 27s/step - categorical_accuracy: 0.2468 - loss: 13.9251 - val_categorical_accuracy: 0.3500 - val_loss: 12.5547 - learning_rate: 1.0000e-04
Epoch 2/30
[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m27s[0m 28s/step - categorical_accuracy: 0.5322 - loss: 11.9925num_batches =  22
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26s/step - categorical_accuracy: 0.5327 - loss: 11.9732 num_batches =  3

Epoch 2: saving model to model_init_2024-12-2707_52

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m630s[0m 27s/step - categorical_accuracy: 0.9997 - loss: 3.3397 - val_categorical_accuracy: 0.5300 - val_loss: 4.3528 - learning_rate: 1.0000e-04
Epoch 15/30
[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m28s[0m 28s/step - categorical_accuracy: 1.0000 - loss: 3.1696num_batches =  22
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27s/step - categorical_accuracy: 0.9999 - loss: 3.1682 num_batches =  3

Epoch 15: saving model to model_init_2024-12-2707_52_10.735881/model-00015-3.13556-0.99698-4.00208-0.55000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m631s[0m 27s/step - categorical_accuracy: 0.9997 - loss: 3.1668 - val_categorical_accuracy: 0.5500 - val_loss: 4.0021 - learning_rate: 1.0000e-04
Epoch 16/30
[1m22/23[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m28s[0m 28s/step - categorical_accuracy: 1.0000 - loss: 3.0030num_batches =  22
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x307c71bd0>

So we have got the training accuarcy of 99 % and validation accuracy of 60 % in the base model

#### Experiment 6
CNN + LSTM Model with 50 epochs, 30 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 160x160 , adam optimiser with learning rate 0.0001, 20 images as input out of 30

In [23]:
train_doc_100 = np.random.permutation(open('Project_data/train_100.csv').readlines())
val_doc_50 = np.random.permutation(open('Project_data/val_50.csv').readlines())
batch_size = 30
num_epochs = 50
print ('# epochs =', num_epochs)

# epochs = 50


In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Activation, Flatten, Dense, Dropout, LSTM, TimeDistributed
from tensorflow.keras import Input, regularizers

# Hyperparameters
# dropout_rate = 0.5
l2_reg = 0.01
num_classes = 5

# Input shape: (number of frames, height, width, channels)
input_shape = (20, 160, 160, 3)

# Initialize the model
model = Sequential()

# TimeDistributed CNN Layers
model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(l2_reg)), input_shape=input_shape))
model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(l2_reg))))
model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

model.add(TimeDistributed(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(l2_reg))))
model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Activation('relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten spatial features
model.add(TimeDistributed(Flatten()))

# LSTM Layers
model.add(LSTM(128, return_sequences=False, kernel_regularizer=regularizers.l2(l2_reg)))
# model.add(Dropout(dropout_rate))

# Fully Connected Layer
model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)))
# model.add(Dropout(dropout_rate))

# Output Layer
model.add(Dense(num_classes, activation='softmax'))


In [25]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(model.summary())

None


In [26]:
train_generator = generator_3(train_path, train_doc_100, batch_size)
val_generator = generator_3(val_path, val_doc_50, batch_size)

In [27]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 23
Validation steps: 4


In [28]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data/train ; batch size = 30


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/50
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m2:22[0m 71s/step - categorical_accuracy: 0.2824 - loss: 14.0048

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68s/step - categorical_accuracy: 0.2858 - loss: 13.9575  Source path =  Project_data/val ; batch size = 30

Epoch 1: saving model to model_init_2024-12-2714_36_52.054119/model-00001-13.45822-0.32127-12.18563-0.37000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1596s[0m 69s/step - categorical_accuracy: 0.2873 - loss: 13.9367 - val_categorical_accuracy: 0.3700 - val_loss: 12.1856 - learning_rate: 1.0000e-04
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69s/step - categorical_accuracy: 0.5515 - loss: 11.5095  
Epoch 2: saving model to model_init_2024-12-2714_36_52.054119/model-00002-11.01347-0.52640-9.95407-0.50000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1606s[0m 70s/step - categorical_accuracy: 0.5505 - loss: 11.4888 - val_categorical_accuracy: 0.5000 - val_loss: 9.9541 - learning_rate: 1.0000e-04
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69s/step - categorical_accuracy: 0.9812 - loss: 1.2706  
Epoch 37: saving model to model_init_2024-12-2714_36_52.054119/model-00037-1.28479-0.97285-2.56614-0.50000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1610s[0m 70s/step - categorical_accuracy: 0.9808 - loss: 1.2712 - val_categorical_accuracy: 0.5000 - val_loss: 2.5661 - learning_rate: 1.0000e-04
Epoch 38/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69s/step - categorical_accuracy: 0.9914 - loss: 1.2326  
Epoch 38: saving model to model_init_2024-12-2714_36_52.054119/model-00038-1.22061-0.99095-2.11267-0.58000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1612s[0m 70s/step - categorical_accuracy: 0.9914 - loss: 1.2321 - val_categorical_accuracy: 0.5800 - val_loss: 2.1127 - learning_rate: 1.0000e-04
Epoch 39/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69s/step - categorical_accuracy: 0.9

<keras.src.callbacks.history.History at 0x3107962d0>

So we have got the training accuarcy of 99 % and validation accuracy of 67 % in the base model

#### Experiment 7
CNN + LSTM Model with 20 epochs, 20 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 160x160 , adam optimiser with learning rate 0.0001, 20 images as input out of 30

In [96]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 20
num_epochs = 20
print ('# epochs =', num_epochs)

# epochs = 20


In [125]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Activation, TimeDistributed
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

l2_reg = 0.01
num_classes = 5

# Input shape: (number of frames, height, width, channels)
input_shape = (20, 160, 160, 3)

GRU_model = Sequential()

GRU_model.add(TimeDistributed(Conv2D(16, (3, 3) , padding='same', activation='relu'),
                          input_shape=(input_shape)))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(32, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(64, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(128, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))


GRU_model.add(TimeDistributed(Flatten()))


GRU_model.add(GRU(128))

GRU_model.add(Dense(128,activation='relu'))

GRU_model.add(Dense(5, activation='softmax'))

  super().__init__(**kwargs)


In [126]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
GRU_model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(GRU_model.summary())

None


In [127]:
train_generator = generator_3(train_path, train_doc, batch_size)
val_generator = generator_3(val_path, val_doc, batch_size)

In [128]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 34
Validation steps: 5


In [129]:
GRU_model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data_Final/train ; batch size = 20


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/20
[1m32/34[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m23s[0m 12s/step - categorical_accuracy: 0.3449 - loss: 1.5167

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - categorical_accuracy: 0.3513 - loss: 1.5067 Source path =  Project_data_Final/val ; batch size = 20

Epoch 1: saving model to model_init_2024-12-2816_47_18.992975/model-00001-1.34730-0.45400-1.19947-0.52000.keras
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m467s[0m 12s/step - categorical_accuracy: 0.3542 - loss: 1.5022 - val_categorical_accuracy: 0.5200 - val_loss: 1.1995 - learning_rate: 1.0000e-04
Epoch 2/20
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11s/step - categorical_accuracy: 0.8271 - loss: 0.6712 
Epoch 2: saving model to model_init_2024-12-2816_47_18.992975/model-00002-0.61812-0.84465-1.16294-0.56000.keras
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 11s/step - categorical_accuracy: 0.8276 - loss: 0.6697 - val_categorical_accuracy: 0.5600 - val_loss: 1.1629 - learning_rate: 1.0000e-04
Epoch 3/20
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x1f107e82f90>

So we have got the training accuarcy of 99 % and validation accuracy of 65 % in the base model

#### Experiment 8 - Adding dropout layer, increasing number of layers
CNN + GRU Model with 20 epochs, 30 batch size

Without dropouts in Conv layer and with batch normalization

Input image size 160x160 , adam optimiser with learning rate 0.0001, 20 images as input out of 30

In [97]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 30
num_epochs = 20
print ('# epochs =', num_epochs)

# epochs = 20


In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Activation, TimeDistributed
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

l2_reg = 0.01
num_classes = 5

# Input shape: (number of frames, height, width, channels)
input_shape = (20, 160, 160, 3)

GRU_model = Sequential()

GRU_model.add(TimeDistributed(Conv2D(16, (3, 3) , padding='same', activation='relu'),
                          input_shape=(input_shape)))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(32, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(32, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(64, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(64, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(128, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))

GRU_model.add(TimeDistributed(Conv2D(128, (3, 3) , padding='same', activation='relu')))
GRU_model.add(TimeDistributed(BatchNormalization()))
GRU_model.add(TimeDistributed(MaxPooling2D((2, 2))))


GRU_model.add(TimeDistributed(Flatten()))


GRU_model.add(GRU(128))

GRU_model.add(Dense(128,activation='relu'))
GRU_model.add(Dropout(0.25))

GRU_model.add(Dense(5, activation='softmax'))

  super().__init__(**kwargs)


NameError: name 'Dropout' is not defined

In [147]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
GRU_model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(GRU_model.summary())

None


In [148]:
train_generator = generator_3(train_path, train_doc, batch_size)
val_generator = generator_3(val_path, val_doc, batch_size)

In [149]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 23
Validation steps: 4


In [150]:
GRU_model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data_Final/train ; batch size = 30


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/20
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m1:15[0m 38s/step - categorical_accuracy: 0.2577 - loss: 1.6297

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37s/step - categorical_accuracy: 0.2626 - loss: 1.6241 Source path =  Project_data_Final/val ; batch size = 30

Epoch 1: saving model to model_init_2024-12-2816_47_18.992975/model-00001-1.56560-0.31373-1.41121-0.40000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1005s[0m 38s/step - categorical_accuracy: 0.2647 - loss: 1.6217 - val_categorical_accuracy: 0.4000 - val_loss: 1.4112 - learning_rate: 1.0000e-04
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41s/step - categorical_accuracy: 0.5078 - loss: 1.2675 
Epoch 2: saving model to model_init_2024-12-2816_47_18.992975/model-00002-1.23144-0.52338-1.41407-0.35833.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m966s[0m 41s/step - categorical_accuracy: 0.5084 - loss: 1.2660 - val_categorical_accuracy: 0.3583 - val_loss: 1.4141 - learning_rate: 1.0000e-04
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x1f59b09c710>

So we have got the training accuarcy of 99 % and validation accuracy of 48 % in the base model

#### Experiment 9 - Pretrained Mobile net, without learning weights, using LSTM

In [89]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 30
num_epochs = 20
print ('# epochs =', num_epochs)

# epochs = 20


In [90]:
train_generator = generator_3(train_path, train_doc, batch_size)
val_generator = generator_3(val_path, val_doc, batch_size)

In [91]:
# Importing the transfer learning model:
from keras.applications import mobilenet

mobilenet_transfer = mobilenet.MobileNet(weights='imagenet', include_top=False)

  mobilenet_transfer = mobilenet.MobileNet(weights='imagenet', include_top=False)


In [92]:
# Building the model :

input_shape = (20,160,160,3)
        
mobileNetModel= Sequential()
mobileNetModel.add(TimeDistributed(mobilenet_transfer,input_shape=(input_shape)))


for layer in mobileNetModel.layers:
    layer.trainable = False


mobileNetModel.add(TimeDistributed(BatchNormalization()))
mobileNetModel.add(TimeDistributed(MaxPooling2D((2, 2))))
mobileNetModel.add(TimeDistributed(Flatten()))

mobileNetModel.add(LSTM(128))
mobileNetModel.add(Dropout(0.25))

mobileNetModel.add(Dense(128,activation='relu'))
mobileNetModel.add(Dropout(0.25))

mobileNetModel.add(Dense(5, activation='softmax'))


optimiser = optimizers.Adam()
mobileNetModel.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (mobileNetModel.summary())

None


In [93]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

In [94]:
mobileNetModel.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data/train ; batch size = 30


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/20
[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m5s[0m 3s/step - categorical_accuracy: 0.3419 - loss: 1.4811

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - categorical_accuracy: 0.3509 - loss: 1.4643Source path =  Project_data/val ; batch size = 30

Epoch 1: saving model to model_init_2024-12-2914_16_28.560001/model-00001-1.28836-0.44495-0.92080-0.69000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 3s/step - categorical_accuracy: 0.3548 - loss: 1.4570 - val_categorical_accuracy: 0.6900 - val_loss: 0.9208 - learning_rate: 0.0010
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - categorical_accuracy: 0.7857 - loss: 0.7050
Epoch 2: saving model to model_init_2024-12-2914_16_28.560001/model-00002-0.67111-0.77526-0.65101-0.73000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 3s/step - categorical_accuracy: 0.7853 - loss: 0.7036 - val_categorical_accuracy: 0.7300 - val_loss: 0.6510 - learning_rate: 0.0010
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - 

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4s/step - categorical_accuracy: 0.9997 - loss: 0.0101 - val_categorical_accuracy: 0.7900 - val_loss: 0.6599 - learning_rate: 8.0000e-06
Epoch 20/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - categorical_accuracy: 0.9999 - loss: 0.0108
Epoch 20: saving model to model_init_2024-12-2914_16_28.560001/model-00020-0.01393-0.99849-0.71122-0.76000.keras
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 4s/step - categorical_accuracy: 0.9999 - loss: 0.0109 - val_categorical_accuracy: 0.7600 - val_loss: 0.7112 - learning_rate: 8.0000e-06


<keras.src.callbacks.history.History at 0x38879c210>

So we have got the training accuarcy of 99 % and validation accuracy of 76 % in the base model

#### Experiment 10 - Pretrained Mobile net, with learning weights, using GRU

In [44]:
train_doc = np.random.permutation(open('Project_data_Final/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data_Final/val.csv').readlines())
batch_size = 20
num_epochs = 20
print ('# epochs =', num_epochs)

# epochs = 20


In [45]:
from keras.applications import mobilenet

mobilenet_transfer = mobilenet.MobileNet(weights='imagenet', include_top=False)

num_classes = 5

# Input shape: (number of frames, height, width, channels)
input_shape = (18, 120, 120, 3)
        
preTrainedGRU = Sequential()
preTrainedGRU.trainable = True

preTrainedGRU.add(TimeDistributed(mobilenet_transfer,input_shape=(input_shape)))


preTrainedGRU.add(TimeDistributed(BatchNormalization()))
preTrainedGRU.add(TimeDistributed(MaxPooling2D((2, 2))))
preTrainedGRU.add(TimeDistributed(Flatten()))

preTrainedGRU.add(GRU(128))
preTrainedGRU.add(Dropout(0.25))

preTrainedGRU.add(Dense(128,activation='relu'))
preTrainedGRU.add(Dropout(0.25))

preTrainedGRU.add(Dense(num_classes, activation='softmax'))


  mobilenet_transfer = mobilenet.MobileNet(weights='imagenet', include_top=False)


In [46]:
# Compile the model with an optimizer
optimiser = optimizers.Adam(learning_rate=0.0001)  # You can experiment with the learning rate
preTrainedGRU.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# Print the model summary to check the total number of parameters
print(preTrainedGRU.summary())

None


In [47]:
train_generator = generator_2(train_path, train_doc, batch_size)
val_generator = generator_2(val_path, val_doc, batch_size)

In [48]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

Steps per epoch: 34
Validation steps: 5


In [49]:
preTrainedGRU.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, initial_epoch=0)

Source path =  Project_data_Final/train ; batch size = 20
num_batches =  33


  image = imageio.imread(img_path).astype(np.float32)  # Read the image using imageio


Epoch 1/20
[1m32/34[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m16s[0m 8s/step - categorical_accuracy: 0.2378 - loss: 1.7694

  image = imageio.imread(img_path).astype(np.float32)  # Read the image


num_batches =  33
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8s/step - categorical_accuracy: 0.2396 - loss: 1.7637Source path =  Project_data_Final/val ; batch size = 20
num_batches =  5
num_batches =  5

Epoch 1: saving model to model_init_2024-12-3014_08_30.722141/model-00001-1.67187-0.26697-1.24460-0.56000.keras
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m469s[0m 9s/step - categorical_accuracy: 0.2403 - loss: 1.7610 - val_categorical_accuracy: 0.5600 - val_loss: 1.2446 - learning_rate: 1.0000e-04
Epoch 2/20
[1m32/34[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m13s[0m 7s/step - categorical_accuracy: 0.6114 - loss: 1.1226num_batches =  33
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7s/step - categorical_accuracy: 0.6111 - loss: 1.1202num_batches =  5

Epoch 2: saving model to model_init_2024-12-3014_08_30.722141/model-00002-1.08280-0.60633-0.99587-0.66000.keras
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 

<keras.src.callbacks.history.History at 0x2accff3ef10>