In [1]:
import threading
import time
import cv2
import numpy as np
import torch
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
from torchvision.transforms import ToTensor
from jetcam.usb_camera import USBCamera
from jetcam.utils import bgr8_to_jpeg
import ipywidgets
from IPython.display import display

In [2]:
!ls -ltrh /dev/video*

crw-rw---- 1 root video 81, 0 Dec 21 22:40 /dev/video0


In [3]:
# For Jupyter Notebook
!jupyter nbextension enable --py widgetsnbextension --sys-prefix

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [4]:
from jetcam.usb_camera import USBCamera

camera = USBCamera(width=224, height=224, capture_width=640, capture_height=480, capture_device=0)
image_widget = ipywidgets.Image(format='jpeg',width = 400, height = 300)
display(image_widget)

Image(value=b'', format='jpeg', height='300', width='400')

In [5]:
# Import the necessary widget module
from IPython.display import display, clear_output
import ipywidgets as widgets

input_data_lock = threading.Lock()

CLASSES_LIST = ["WalkingWithDog", "TaiChi", "Swing", "HorseRace"]

# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open('conv3D_model_best.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()
   
# Define the transform for input frames
transform = ToTensor()

# Initialize frame_buffer with an empty frame
frame_buffer = []

# Define the shape of the output tensor based on your model
output_shape = (1, 4)

# Create an empty output tensor
output = np.empty(output_shape, dtype=np.float32)

# Initialize input_data before threads start
input_data = torch.zeros((1, 25, 3, 64, 64), dtype=torch.float32)

# Allocate device memory for inputs and outputs
d_input = cuda.mem_alloc(input_data.element_size() * input_data.nelement())
d_output = cuda.mem_alloc(output.nbytes)

# Create a stream
stream = cuda.Stream()

# Flag to indicate when to stop the threads
stop_threads = False
  
# Separate widget for displaying output text
output_text = widgets.Textarea(value="", disabled=True, layout={'height': '100px', 'width': '100%'})
display(output_text)

# Add this line to create a threading lock for output text
output_text_lock = threading.Lock()



def capture_frames():
    global frame_buffer, input_data, stop_threads
    
    def preprocess_video(frames):
        frames = [transform(frame.astype(np.float32)) for frame in frames]
        frames_tensor = torch.stack(frames)
        return frames_tensor.unsqueeze(0)
    
    while not stop_threads:
        image = camera.read()
        resized_frame = cv2.resize(image, (64, 64))
        normalized_frame = resized_frame / 255.0

        # Update the frame buffer
        frame_buffer.append(normalized_frame)
        frame_buffer = frame_buffer[-25:]

        time.sleep(1/30)  # Adjust the sleep time based on your camera's frame rate
              
        with input_data_lock:
            input_data = preprocess_video(frame_buffer)  
            

def inference():
    global frame_buffer, input_data
    current_prediction = None 
    while not stop_threads:
        if len(frame_buffer) >= 25:
            
            
            print("D_INPUT : ")
            print(d_input)
            
            print("input_data : ")
            print(input_data)
            
            print(input_data.shape)
            
            
            with input_data_lock:
                cuda.memcpy_htod_async(d_input, input_data.numpy().ravel(), stream)

            # Measure inference time
            start_time = time.time()

            # Execute inference
            with input_data_lock:
                context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
                stream.synchronize()

            end_time = time.time()
            inference_time = end_time - start_time
            
            # Transfer predictions back to host
            cuda.memcpy_dtoh_async(output, d_output, stream)
            stream.synchronize()

            # Convert PyCuda GPU array to NumPy array
            output_np = np.empty(output_shape, dtype=np.float32)
            cuda.memcpy_dtoh_async(output_np, d_output, stream)
            stream.synchronize()

            # Convert NumPy array to PyTorch tensor
            output_tensor = torch.from_numpy(output_np)
            
           
            print("D_OUTPUT : ")
            print(d_output)
            
            print("OUTPUT : ")
            print(output)
            
            print(output.shape)
            
            '''output_tensor = torch.from_numpy(output)
            
            #_, predicted_class = torch.max(output_tensor, 1)
            predicted_class_index = predicted_class.item()
            
            predicted_class_name = CLASSES_LIST[predicted_class_index]
            
            #if current_prediction != predicted_class_name:
            current_prediction = predicted_class_name
            clear_output(wait=True)  # Clear the previous output
            display_text.value = f"Live Prediction: {predicted_class_name}"'''

                 
            # Inside the 'inference' function
            output_tensor = torch.from_numpy(output)
            
            # Predicted class
            _, predicted_class = torch.max(output_tensor, 1)
            predicted_class_index = predicted_class.item()

            predicted_class_name = CLASSES_LIST[predicted_class_index]

            # Update the display in real-time
            with output_text_lock:
                output_text.value = f"Raw Model Output: {output_tensor}\n" \
                                    f"Predicted Class Index: {predicted_class_index}\n" \
                                    f"Predicted Class Name: {predicted_class_name}"

            '''# Inside the 'inference' function
            print(f"Raw Model Output: {output_tensor}\n"
                  f"Predicted Class Index: {predicted_class_index}\n"
                  f"Predicted Class Name: {predicted_class_name}")'''
    
            # Display the last 25 frames
            frames_to_display = frame_buffer[-25:]
            for frame in frames_to_display:
                # Convert the frame to bytes and update the image widget
                image_bytes = cv2.imencode('.jpg', (frame * 255).astype(np.uint8))[1].tobytes()
                image_widget.value = image_bytes

                
            # Introduce a delay for approximately 0.8333 seconds
            time.sleep(1)



# Create and start threads
capture_thread = threading.Thread(target=capture_frames)
inference_thread = threading.Thread(target=inference)

capture_thread.start()
inference_thread.start()

# Run threads until 'q' is pressed
try:
    while True:
        if cv2.waitKey(1) & 0xFF == ord('q'):
            stop_threads = True
            break
except KeyboardInterrupt:
    stop_threads = True

# Wait for threads to finish
capture_thread.join()
inference_thread.join()

# Release the camera
camera.running = False

Textarea(value='', disabled=True, layout=Layout(height='100px', width='100%'))

D_INPUT : 
<pycuda._driver.DeviceAllocation object at 0x7f083709e0>
input_data : 
tensor([[[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

          [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

          [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           

In [None]:
import os
os._exit(00)