In [7]:
from utils.camera import CameraDisplay
import time
import cv2
import onnxruntime as ort
import torchvision.transforms.functional as tf
import torch

from PIL import Image
from utils.dataloader import image_transform
from utils.viz import plot_predictions

from models.tinyyolov2 import TinyYoloV2Original, TinyYoloV2PersonOnly
from torch.quantization import fuse_modules
now = time.time()

# Full Yolo Torch

In [None]:
model = TinyYoloV2Original()
model.load_pt_from_disk("data/voc_pretrained.pt", discard_last_layer=False)

def predict(image):
    image = Image.fromarray(image)
    image = image_transform(image)[0]
    image = tf.to_tensor(image)
    image = torch.unsqueeze(image, 0)
    with torch.no_grad():
        predictions = model(image)
    return plot_predictions(predictions, image, return_array=True)

def callback(image):
    global now

    fps = f"{int(1/(time.time() - now))}"
    now = time.time()
    image = predict(image)[70:-70]
    cv2.putText(image, "fps="+fps, (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (100, 255, 0), 2, cv2.LINE_AA)
    return image

# Initialize the camera with the callback
cam = CameraDisplay(callback)
cam.start()

In [4]:
cam.stop()
cam.release()

Camera released


# Person Only Torch

In [None]:
model = TinyYoloV2PersonOnly()
model.load_state_dict(torch.load("data/person_only.pt"), strict=False)

def predict(image):
    image = Image.fromarray(image)
    image = image_transform(image)[0]
    image = tf.to_tensor(image)
    image = torch.unsqueeze(image, 0)
    with torch.no_grad():
        predictions = model(image)
    return plot_predictions(predictions, image, return_array=True, person_only=True)

def callback(image):
    global now

    fps = f"{int(1/(time.time() - now))}"
    now = time.time()
    image = predict(image)[70:-70]
    cv2.putText(image, "fps="+fps, (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (100, 255, 0), 2, cv2.LINE_AA)
    return image

# Initialize the camera with the callback
cam = CameraDisplay(callback)
cam.start()

In [6]:
cam.stop()
cam.release()

Camera released


# Person Only Torch Fused

In [None]:
model = TinyYoloV2PersonOnly()
model.load_state_dict(torch.load("data/person_only.pt"), strict=False)
modules = [name for name, _ in model.named_modules()][5:-1]
modules_to_fuse = [modules[i:i + 2] for i in range(0, len(modules), 2)]
fuse_modules(model, modules_to_fuse, inplace=True)

def predict(image):
    image = Image.fromarray(image)
    image = image_transform(image)[0]
    image = tf.to_tensor(image)
    image = torch.unsqueeze(image, 0)
    with torch.no_grad():
        predictions = model(image)
    return plot_predictions(predictions, image, return_array=True, person_only=True)

def callback(image):
    global now

    fps = f"{int(1/(time.time() - now))}"
    now = time.time()
    image = predict(image)[70:-70]
    cv2.putText(image, "fps="+fps, (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (100, 255, 0), 2, cv2.LINE_AA)
    return image

# Initialize the camera with the callback
cam = CameraDisplay(callback)
cam.start()

In [9]:
cam.stop()
cam.release()

Camera released


# Full Yolo Onnx

In [6]:
model_file = "pretrained.onnx"
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
ort_sess = ort.InferenceSession('data/'+model_file, sess_options=sess_options, providers=['CUDAExecutionProvider'])

def predict(image):
    image = Image.fromarray(image)
    image = image_transform(image)[0]
    image = tf.to_tensor(image)
    image = torch.unsqueeze(image, 0)
    onnx_predictions = ort_sess.run(None, {"input.1": image.numpy()})[0]
    return plot_predictions(onnx_predictions, image, return_array=True)


def callback(image):
    global now

    fps = f"{int(1/(time.time() - now))}"
    now = time.time()
    image = predict(image)[70:-70]
    cv2.putText(image, "fps="+fps, (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (100, 255, 0), 2, cv2.LINE_AA)
    return image

# Initialize the camera with the callback
cam = CameraDisplay(callback)
cam.start()

In [11]:
cam.stop()
cam.release()

Camera released


# Person Only Onnx

In [None]:
model_file = "person_only.onnx"
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
ort_sess = ort.InferenceSession('data/'+model_file, sess_options=sess_options, providers=['CUDAExecutionProvider'])

def predict(image):
    image = Image.fromarray(image)
    image = image_transform(image)[0]
    image = tf.to_tensor(image)
    image = torch.unsqueeze(image, 0)
    onnx_predictions = ort_sess.run(None, {"input.1": image.numpy()})[0]
    return plot_predictions(onnx_predictions, image, return_array=True, person_only=True)


def callback(image):
    global now

    fps = f"{int(1/(time.time() - now))}"
    now = time.time()
    image = predict(image)[70:-70]
    cv2.putText(image, "fps="+fps, (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (100, 255, 0), 2, cv2.LINE_AA)
    return image

# Initialize the camera with the callback
cam = CameraDisplay(callback)
cam.start()

In [None]:
cam.stop()
cam.release()