In [1]:
import torch
import cv2
import numpy as np
import torch.nn as nn
from torchvision.transforms import ToTensor
import time
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

In [2]:
!ls -ltrh /dev/video*

crw-rw---- 1 root video 81, 0 Dec 21 22:40 /dev/video0


In [3]:
from jetcam.usb_camera import USBCamera

#TODO change capture_device if incorrect for your system
camera = USBCamera(width=224, height=224, capture_width=640, capture_height=480, capture_device=0)

In [1]:
import torch
import cv2
import numpy as np
import torch.nn as nn
from torchvision.transforms import ToTensor
import time
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
from jetcam.usb_camera import USBCamera
import ipywidgets
from IPython.display import display
from jetcam.utils import bgr8_to_jpeg

# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open('conv3D_model_best.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

# Define the transform for input frames
transform = ToTensor()

# Constants
SEQUENCE_LENGTH = 25
IMAGE_HEIGHT = 64
IMAGE_WIDTH = 64
CLASSES_LIST = ["WalkingWithDog", "TaiChi", "Swing", "HorseRace"]


# Function to preprocess a single frame
def preprocess_frame(frame):
    # Convert the image to a NumPy array
    frame_np = np.array(frame)

    # Resize the frame
    resized_frame = cv2.resize(frame_np, (IMAGE_HEIGHT, IMAGE_WIDTH))

    # Normalize the frame
    normalized_frame = resized_frame / 255.0

    return transform(normalized_frame.astype(np.float32))

# Function to preprocess a sequence of frames
def preprocess_video(frames):
    frames = [preprocess_frame(frame) for frame in frames]
    frames_tensor = torch.stack(frames)
    return frames_tensor.unsqueeze(0)

# Create camera object
camera = USBCamera(width=224, height=224, capture_width=640, capture_height=480, capture_device=0)
image_widget = ipywidgets.Image(format='jpeg')
display(image_widget)

# Create text widget for live predictions
live_prediction_widget = ipywidgets.Textarea(value="", disabled=True, layout={'height': '100px', 'width': '100%'})
display(live_prediction_widget)

# Capture and preprocess frames in real-time
captured_frames = []
current_frame_count = 0

try:
    while True:
        start_time = time.time()

        # Capture a frame
        image = camera.read()

        # Display the frame
        image_widget.value = bgr8_to_jpeg(image)

        # Preprocess the frame
        frame = preprocess_frame(image)

        # Add the frame to the sequence
        captured_frames.append(frame)
        current_frame_count += 1

        # If we have enough frames, make a prediction
        if current_frame_count == SEQUENCE_LENGTH:
            # Preprocess the sequence of frames
            input_data = preprocess_video(captured_frames)

            output_shape = (1, 4)
            output = np.empty(output_shape, dtype=np.float32)
            # Allocate device memory for inputs and outputs
            d_input = cuda.mem_alloc(input_data.element_size() * input_data.nelement())
            d_output = cuda.mem_alloc(output.nbytes)

            # Create a stream
            stream = cuda.Stream()

            # Transfer input data to device
            cuda.memcpy_htod_async(d_input, input_data.numpy().ravel(), stream)

            start_time = time.time()
            # Execute inference
            context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
            stream.synchronize()

            end_time = time.time()
            inference_time = end_time - start_time

            # Transfer predictions back to host
            cuda.memcpy_dtoh_async(output, d_output, stream)
            stream.synchronize()

            # Convert the NumPy array to a PyTorch tensor
            output_tensor = torch.from_numpy(output)

            # Get the predicted class index
            _, predicted_class = torch.max(output_tensor, 1)
            predicted_class_index = predicted_class.item()

            # Map the index to the class name
            predicted_class_name = CLASSES_LIST[predicted_class_index]

            output_text = f"Raw Model Output:\n{output_tensor}\n" \
                          f"Predicted Class Index: {predicted_class_index}\n" \
                          f"Predicted Class Name: {predicted_class_name}\n"\
                          f"Inference Time: {inference_time}"

            live_prediction_widget.value = output_text

            # Reset frame count and captured frames
            current_frame_count = 0
            captured_frames = []

except KeyboardInterrupt:
    # Release the camera
    camera.running = False

Image(value=b'', format='jpeg')

Textarea(value='', disabled=True, layout=Layout(height='100px', width='100%'))

In [None]:
import os
os._exit(00)

In [None]:
import os

# Get the size of the serialized engine file
model_size = os.path.getsize('/nvdli-nano/data/Inference/conv3D_model.trt')

print(f"The size of the TensorRT model file is: {model_size / (1024 * 1024):.2f} MB")