# ***Human Activity detection using 3DCNN***

## 1. Download the UFC50 Dataset and visualize the data

## 2. Install Dependencies:

In [1]:
import tensorrt

In [2]:
!/usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/conv3D_model_best.onnx --saveEngine=/nvdli-nano/data/Inference/conv3D_model_best.trt

&&&& RUNNING TensorRT.trtexec [TensorRT v8201] # /usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/conv3D_model_best.onnx --saveEngine=/nvdli-nano/data/Inference/conv3D_model_best.trt
[12/17/2023-07:57:07] [I] === Model Options ===
[12/17/2023-07:57:07] [I] Format: ONNX
[12/17/2023-07:57:07] [I] Model: /nvdli-nano/data/Inference/conv3D_model_best.onnx
[12/17/2023-07:57:07] [I] Output:
[12/17/2023-07:57:07] [I] === Build Options ===
[12/17/2023-07:57:07] [I] Max batch: explicit batch
[12/17/2023-07:57:07] [I] Workspace: 16 MiB
[12/17/2023-07:57:07] [I] minTiming: 1
[12/17/2023-07:57:07] [I] avgTiming: 8
[12/17/2023-07:57:07] [I] Precision: FP32
[12/17/2023-07:57:07] [I] Calibration: 
[12/17/2023-07:57:07] [I] Refit: Disabled
[12/17/2023-07:57:07] [I] Sparsity: Disabled
[12/17/2023-07:57:07] [I] Safe mode: Disabled
[12/17/2023-07:57:07] [I] DirectIO mode: Disabled
[12/17/2023-07:57:07] [I] Restricted mode: Disabled
[12/17/2023-07:57:07] [I] Save engine: /nvdli-nano/data/Inf

In [18]:
!pip3 install onnx

Collecting onnx
  Downloading https://files.pythonhosted.org/packages/8f/71/1543d8dad6a26df1da8953653ebdbedacea9f1a5bcd023fe10f8c5f66d63/onnx-1.14.1.tar.gz (11.3MB)
[K    100% |################################| 11.3MB 45kB/s  eta 0:00:01
Collecting protobuf>=3.20.2 (from onnx)
  Downloading https://files.pythonhosted.org/packages/6c/be/4e32d02bf08b8f76bf6e59f2a531690c1e4264530404501f3489ca975d9a/protobuf-4.21.0-py2.py3-none-any.whl (164kB)
[K    100% |################################| 174kB 2.7MB/s eta 0:00:01
[31mprotobuf requires Python '>=3.7' but the running Python is 3.6.9[0m
[?25h

In [13]:
# Import the required libraries.
import os
import cv2

import math
import random
import numpy as np
import datetime as dt
from collections import deque

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Specify the device (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
# Discard the output of this cell.
%%capture

# Downlaod the UCF50 Dataset
!wget --no-check-certificate https://www.crcv.ucf.edu/data/UCF50.rar

#Extract the Dataset
!unrar x UCF50.rar

##   Data Visualization

In [14]:
def frames_extraction(video_path):
    '''
    This function will extract the required frames from a video after resizing and normalizing them.
    Args:
        video_path: The path of the video in the disk, whose frames are to be extracted.
    Returns:
        frames_list: A list containing the resized and normalized frames of the video.
    '''

    # Declare a list to store video frames.
    frames_list = []

    # Read the Video File using the VideoCapture object.
    video_reader = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

    # Iterate through the Video Frames.
    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Reading the frame from the video.
        success, frame = video_reader.read()

        # Check if Video frame is not successfully read then break the loop
        if not success:
            break

        # Resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255

        # Append the normalized frame into the frames list
        frames_list.append(normalized_frame)

    # Release the VideoCapture object.
    video_reader.release()

    # Return the frames list.
    return frames_list

In [15]:
def create_dataset():
    '''
    This function will extract the data of the selected classes and create the required dataset.
    Returns:
        features:          A list containing the extracted frames of the videos.
        labels:            A list containing the indexes of the classes associated with the videos.
        video_files_paths: A list containing the paths of the videos in the disk.
    '''

    # Declared Empty Lists to store the features, labels and video file path values.
    features = []
    labels = []
    video_files_paths = []

    # Iterating through all the classes mentioned in the classes list
    for class_index, class_name in enumerate(CLASSES_LIST):

        # Display the name of the class whose data is being extracted.
        print(f'Extracting Data of Class: {class_name}')

        # Get the list of video files present in the specific class name directory.
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))

        # Iterate through all the files present in the files list.
        for file_name in files_list:

            # Get the complete video path.
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)

            # Extract the frames of the video file.
            frames = frames_extraction(video_file_path)

            # Check if the extracted frames are equal to the SEQUENCE_LENGTH specified above.
            # So ignore the vides having frames less than the SEQUENCE_LENGTH.
            if len(frames) == SEQUENCE_LENGTH:

                # Append the data to their repective lists.
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)

    # Converting the list to numpy arrays
    features = np.asarray(features)
    labels = np.array(labels)

    # Return the frames, class index, and video file path.
    return features, labels, video_files_paths

In [16]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.utils import make_grid


# Set a seed for reproducibility
seed_constant = 27
torch.manual_seed(seed_constant)
np.random.seed(seed_constant)
random.seed(seed_constant)

# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 25

# Specify the directory containing the UCF50 dataset.
DATASET_DIR = "UCF50"

# Specify the list containing the names of the classes used for training. Choose any set of classes.
CLASSES_LIST = ["WalkingWithDog", "TaiChi", "Swing", "HorseRace"]

# Define a PyTorch dataset class
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        self.transform = transforms.Compose([
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        frames = self.features[idx]
        frames = [self.transform(frame.astype(np.float32)) for frame in frames]
        frames = torch.stack(frames)
        return frames, torch.tensor(self.labels[idx])


# Create the dataset.
features, labels, video_files_paths = create_dataset()



Extracting Data of Class: WalkingWithDog


FileNotFoundError: [Errno 2] No such file or directory: 'UCF50/WalkingWithDog'

In [8]:

# Display a random subset of videos
plt.figure(figsize=(20, 20))
for i in range(1, 21):
    random_index = random.randint(0, len(features) - 1)
    selected_class_name = CLASSES_LIST[labels[random_index]]
    selected_video_path = video_files_paths[random_index]

    # Read the first frame of the video file.
    video_reader = cv2.VideoCapture(selected_video_path)
    _, bgr_frame = video_reader.read()
    video_reader.release()

    # Convert the frame from BGR into RGB format.
    rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)

    # Write the class name on the video frame.
    cv2.putText(rgb_frame, selected_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    plt.subplot(5, 4, i)
    plt.imshow(rgb_frame)
    plt.axis('off')

plt.show()



Output hidden; open in https://colab.research.google.com to view.

In [9]:
# Using PyTorch's DataLoader to handle batching and shuffling
dataset = CustomDataset(features, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [17]:
# Define the 3D CNN model in PyTorch
class Conv3DModel(nn.Module):
    def __init__(self, num_classes=len(CLASSES_LIST), num_frames=SEQUENCE_LENGTH):
        super(Conv3DModel, self).__init__()
        self.conv1 = nn.Conv3d(num_frames, 32, kernel_size=(3, 3, 3), padding=1)
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.dropout1 = nn.Dropout3d(0.4)

        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=1)
        self.pool2 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.dropout2 = nn.Dropout3d(0.4)

        self.conv3 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=1)
        self.pool3 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.dropout3 = nn.Dropout3d(0.4)

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * (SEQUENCE_LENGTH // 8) * (IMAGE_HEIGHT // 8) * (IMAGE_WIDTH // 8), 128)
        self.dropout4 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.dropout1(x)

        x = self.conv2(x)
        x = self.pool2(x)
        x = self.dropout2(x)

        x = self.conv3(x)
        x = self.pool3(x)
        x = self.dropout3(x)

        x = self.flatten(x)
        x = self.fc1(x)
        x = self.dropout4(x)
        x = self.fc2(x)

        return x

# Create an instance of the model
conv3D_model = Conv3DModel()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(conv3D_model.parameters(), lr=0.001)



In [None]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    conv3D_model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = conv3D_model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validate the model
    conv3D_model.eval()
    with torch.no_grad():
        total, correct = 0, 0
        for inputs, labels in val_loader:
            outputs = conv3D_model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.squeeze()).sum().item()

    accuracy = correct / total
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Accuracy: {accuracy:.4f}')

# Save the model
torch.save(conv3D_model.state_dict(), 'conv3D_model.pth')
print("Conv3D model saved successfully!")

# Evaluate the trained model.
conv3D_model.eval()
total, correct = 0, 0
with torch.no_grad():
    for inputs, labels in val_loader:
        outputs = conv3D_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.squeeze()).sum().item()

model_evaluation_accuracy = correct / total
print("Validation Accuracy:", model_evaluation_accuracy)

Epoch 1/10, Loss: 29.5064, Validation Accuracy: 0.1633
Epoch 2/10, Loss: 12.5289, Validation Accuracy: 0.2755
Epoch 3/10, Loss: 2.6092, Validation Accuracy: 0.2755
Epoch 4/10, Loss: 2.8312, Validation Accuracy: 0.3980
Epoch 5/10, Loss: 0.5683, Validation Accuracy: 0.4898
Epoch 6/10, Loss: 1.2543, Validation Accuracy: 0.5714
Epoch 7/10, Loss: 2.0865, Validation Accuracy: 0.6224
Epoch 8/10, Loss: 2.8411, Validation Accuracy: 0.4184
Epoch 9/10, Loss: 0.4220, Validation Accuracy: 0.6837
Epoch 10/10, Loss: 0.6089, Validation Accuracy: 0.4592
Conv3D model saved successfully!
Validation Accuracy: 0.45918367346938777


In [12]:
import torch.onnx
import torch.nn as nn
!pip install onnx

# Create an instance of the model
conv3D_model = Conv3DModel()

# Load the trained weights
conv3D_model.load_state_dict(torch.load('/content/drive/MyDrive/Project Phase 1/3DCNN/conv3D_model.pth'))

# Set the model to evaluation mode before exporting
conv3D_model.eval()

# Create a dummy input tensor to trace the model
dummy_input = torch.randn(1, SEQUENCE_LENGTH, 3, IMAGE_HEIGHT, IMAGE_WIDTH)

# Provide the path where you want to save the ONNX model
onnx_path = '/content/drive/MyDrive/Project Phase 1/3DCNN/conv3D_model.onnx'

# Export the model to ONNX format
torch.onnx.export(conv3D_model, dummy_input, onnx_path, verbose=True)

print("Model successfully exported to ONNX format.")

Collecting onnx
  Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx
Successfully installed onnx-1.15.0
Model successfully exported to ONNX format.
