In [6]:
### Summary #####
# 1. In this code , a video classification model has been created on UCF101 dataset. It has total 101 classes.
# 2. Due to resource Constraint, For initial stage model has been trained on 10 classes using ResNet50 model.
# 3. We get an "pretrained_weights_c10.h5" file in the first stage of training
# 4. Now we want to add 5 more classes to these pretrained weights. A new dataset has been added named "Custom_dataset_New_5_classes"
# 5. We load the pre-trained model trained on the existing 10 classes.
# 6. We remove the original output layer of the pre-trained model since it only corresponds to the 10 classes.
# 7. We freeze the layers of the pre-trained model to retain their weights.
# 8. We add a new output layer with 15 units to accommodate the additional 5 classes.
# 9. We train the modified model only on the new dataset containing the 5 new classes.
# 10.Finally, we save the model, which now recognizes all classes.



In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

In [2]:
os.chdir(r'/content/drive/MyDrive/Proglient_Assessment')

In [83]:
# Function to extract frames from videos
def extract_frames(video_path, num_frames=16, resize=(224, 224)):
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=np.int16)

    for i in range(total_frames):
        ret, frame = cap.read()
        if ret is False:
            break
        if i in frame_indices:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, resize)
            frames.append(frame)

    cap.release()
    return frames

In [84]:
# Path to the dataset directory
# dataset_dir = '/content/drive/MyDrive/Proglient_Assessment/New_dataset_5_classes'

dataset_dir='/content/drive/MyDrive/Proglient_Assessment/New_dataset_10_classes'

# List to store frames and labels
frames = []
labels = []

In [5]:
# Loop through each class directory
for class_name in os.listdir(dataset_dir):
    class_dir = os.path.join(dataset_dir, class_name)
    # Loop through each video in the class directory
    for video_name in os.listdir(class_dir):
        video_path = os.path.join(class_dir, video_name)
        extracted_frames = extract_frames(video_path)
        frames.extend(extracted_frames)
        labels.extend([class_name] * len(extracted_frames))

# Convert frames and labels to numpy arrays
frames = np.array(frames)
labels = np.array(labels)

In [6]:
# Perform one-hot encoding on the labels
label_binarizer = LabelBinarizer()
labels_encoded = label_binarizer.fit_transform(labels)


In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(frames, labels_encoded, test_size=0.2, random_state=42)

In [8]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(5757, 224, 224, 3)
(5757, 10)
(1440, 224, 224, 3)
(1440, 10)


In [9]:
# Load pre-trained ResNet50 model
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# Build the model
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax') ##no. of classes
])

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
##### Training code###############

In [10]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)

# Train the model with callbacks
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, callbacks=[early_stopping, reduce_lr])

# history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=60, batch_size=32)


# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')
# Save the weights
model.save_weights('pretrained_weights_c10.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Test Loss: 0.0007086097612045705
Test Accuracy: 0.9993055462837219


In [11]:
# Load the saved weights
model.load_weights('pretrained_weights_c10.h5')

# Get the classes from the label binarizer
classes = label_binarizer.classes_

# Print the classes
print("Classes in the dataset:")
for i, class_name in enumerate(classes):
    print(f"{i}: {class_name}")

Classes in the dataset:
0: ApplyEyeMakeup
1: ApplyLipstick
2: Archery
3: BabyCrawling
4: BalanceBeam
5: BandMarching
6: BaseballPitch
7: Basketball
8: BasketballDunk
9: BenchPress


In [25]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Predict classes for the test data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

# Print confusion matrix
print("Confusion Matrix:")
print(conf_matrix)

# Generate classification report
class_names = ["class_0", "class_1", "class_2", "class_3", "class_4", "class_5", "class_6", "class_7", "class_8", "class_9"]
report = classification_report(y_true_classes, y_pred_classes, target_names=class_names)

print("\nClassification Report:")
print(report)


Confusion Matrix:
[[139   0   0   0   0   0   0   0   0   0]
 [  0 133   0   0   0   0   0   0   0   0]
 [  0   0 153   0   0   0   0   0   0   0]
 [  0   1   0 145   0   0   0   0   0   0]
 [  0   0   0   0 145   0   0   0   0   0]
 [  0   0   0   0   0 144   0   0   0   0]
 [  0   0   0   0   0   0 147   0   0   0]
 [  0   0   0   0   0   0   0 135   0   0]
 [  0   0   0   0   0   0   0   0 141   0]
 [  0   0   0   0   0   0   0   0   0 157]]

Classification Report:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00       139
     class_1       0.99      1.00      1.00       133
     class_2       1.00      1.00      1.00       153
     class_3       1.00      0.99      1.00       146
     class_4       1.00      1.00      1.00       145
     class_5       1.00      1.00      1.00       144
     class_6       1.00      1.00      1.00       147
     class_7       1.00      1.00      1.00       135
     class_8       1.00      1.00      1

In [None]:
# Inferencing to the unseen video#####

In [12]:
unseen_video_path = '/content/drive/MyDrive/Proglient_Assessment/Testing_videos/v_ApplyEyeMakeup_g07_c06.avi'


In [33]:

# Extract frames from the unseen video
unseen_frames = extract_frames(unseen_video_path)

# Convert frames to numpy array and preprocess
unseen_frames = np.array(unseen_frames) / 255.0  # Normalize pixel values

# Make predictions
predictions = model.predict(unseen_frames)

# Aggregate predictions across frames
final_prediction = np.argmax(np.sum(predictions, axis=0))

# # Map prediction index to class label
label_mapping = {0: 'class_0', 1: 'class_1', 2: 'class_2', 3: 'class_3', 4: 'class_4', 5: 'class_5', 6: 'class_6',7: 'class_7', 8: 'class_8', 9: 'class_9',}
predicted_class = label_mapping[final_prediction]

print(f"The predicted class for the unseen video is: {predicted_class}")

The predicted class for the unseen video is: class_0


In [None]:
       ######## ADD THE CUSTOM MODEL WITH NEW CLASSES ############

In [None]:
# we will now freeze the layers of the pre-trained model that were trained on the initial 10 classes,
# add new layers to handle the new classes, and then train the entire model on the combined dataset (initial 10 classes + new 5 classes).
# This way, the model retains the knowledge learned from the initial classes while also adapting to the new classes.

In [4]:
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import glorot_uniform

In [None]:
# Load the saved weights
model.load_weights('pretrained_weights_c10.h5')

In [None]:
print(model.summary())

In [73]:

# Remove the original output layer
model.layers.pop()

# Freeze the layers of the pretrained model
for layer in model.layers:
    layer.trainable = False

In [49]:
# Function to extract frames from videos
def extract_frames(video_path, num_frames=16, resize=(224, 224)):
    frames = []
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=np.int16)

    for i in range(total_frames):
        ret, frame = cap.read()
        if ret is False:
            break
        if i in frame_indices:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, resize)
            frames.append(frame)

    cap.release()
    return frames

In [51]:
# Path to the New dataset directory

new_dataset_dir='/content/drive/MyDrive/Proglient_Assessment/Custom_dataset_New_5_classes'

# List to store frames and labels
frames = []
labels = []
# Loop through each class directory
for class_name in os.listdir(new_dataset_dir):
    class_dir = os.path.join(new_dataset_dir, class_name)
    # Loop through each video in the class directory
    for video_name in os.listdir(class_dir):
        video_path = os.path.join(class_dir, video_name)
        extracted_frames = extract_frames(video_path)
        frames.extend(extracted_frames)
        labels.extend([class_name] * len(extracted_frames))

# Convert frames and labels to numpy arrays
frames = np.array(frames)
labels = np.array(labels)
# Perform one-hot encoding on the labels
label_binarizer = LabelBinarizer()
labels_encoded = label_binarizer.fit_transform(labels)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(frames, labels_encoded, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(958, 224, 224, 3)
(958, 5)
(240, 224, 224, 3)
(240, 5)


In [61]:
from tensorflow.keras.layers import Dense, Concatenate

In [77]:
# Add new Dense layer for the new classes
new_output = Dense(5, activation='softmax')(model.layers[-1].output)  # Assuming the last layer is Dense

# Concatenate original output with new output
new_output_concatenated = Dense(15, activation='softmax')(model.layers[-2].output)

# Create the new model
model = Model(inputs=model.input, outputs=new_output_concatenated)

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)


history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, callbacks=[early_stopping, reduce_lr])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')
# Save the weights
model.save_weights('pretrained_weights_new15classes.h5')