In [3]:
import torch
import cv2
import numpy as np
import torch.nn as nn
from torchvision.transforms import ToTensor
import time
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

In [5]:
import cv2
import numpy as np
import torch
import tensorrt as trt
from torchvision.transforms import ToTensor

# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open('conv3D_model_best.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

SEQUENCE_LENGTH = 25
DATASET_DIR = "UCT50"
CLASSES_LIST = ["WalkingWithDog","TaiChi","Swing","Horserace"]
IMAGE_HEIGHT = 64
IMAGE_WIDTH = 64
    
# Define the transform for input frames
transform = ToTensor()

def preprocess_video(video_path):
    frames_list = []
    video_reader = cv2.VideoCapture(video_path)

    for frame_counter in range(SEQUENCE_LENGTH):
        success, frame = video_reader.read()
        if not success:
            break

        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        normalized_frame = resized_frame / 255
        frames_list.append(normalized_frame)

    video_reader.release()
    frames_list = [transform(frame.astype(np.float32)) for frame in frames_list]
    frames_tensor = torch.stack(frames_list)
    return frames_tensor.unsqueeze(0)

# Replace 'input_video_path' with the path to your input video
input_video_path = 'v_Swing_g01_c01.avi'
input_data = preprocess_video(input_video_path)

# Define the shape of the output tensor based on your model
output_shape = (1, 4)  # Adjust this based on your model's output shape

# Create an empty output tensor
output = np.empty(output_shape, dtype=np.float32)

# Allocate device memory for inputs and outputs
d_input = cuda.mem_alloc(input_data.element_size() * input_data.nelement())
d_output = cuda.mem_alloc(output.nbytes)


# Create a stream
stream = cuda.Stream()

# Transfer input data to device
cuda.memcpy_htod_async(d_input, input_data.numpy().ravel(), stream)

# Measure inference time
start_time = time.time()

# Execute inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
stream.synchronize()

end_time = time.time()
inference_time = end_time - start_time

# Transfer predictions back to host
cuda.memcpy_dtoh_async(output, d_output, stream)
stream.synchronize()

# Convert the NumPy array to a PyTorch tensor
output_tensor = torch.from_numpy(output)

# Get the predicted class index
_, predicted_class = torch.max(output_tensor, 1)
predicted_class_index = predicted_class.item()

# Map the index to the class name
predicted_class_name = CLASSES_LIST[predicted_class_index]

print(f"The model predicts the input video belongs to class: {predicted_class_name}")
print(f"Inference time: {inference_time:.5f} seconds")

The model predicts the input video belongs to class: Swing
Inference time: 0.06108 seconds


In [11]:
import os

# Get the size of the serialized engine file
model_size = os.path.getsize('/nvdli-nano/data/Inference/conv3D_model_best.trt')

print(f"The size of the TensorRT model file is: {model_size / (1024 * 1024):.2f} MB")

The size of the TensorRT model file is: 13.17 MB
