In [8]:
!pip install ultralytics
!pip install onnx
!pip install segmentation_models_pytorch
!pip install onnxruntime
# Install TensorRT
!pip install tensorrt
!pip install tensorflow

# Install additional dependencies
!pip install nvidia-pyindex
!pip install nvidia-tensorrt
!pip install pycuda

# Install system packages including TensorRT tools
!apt-get update
!apt-get install -y python3-libnvinfer-dev
!apt-get install -y uff-converter-tf

Collecting pycuda
  Downloading pycuda-2024.1.2.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.15-py3-none-any.whl.metadata (2.9 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading pytools-2024.1.15-py3-none-any.whl (88 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.6/88.6 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml)

In [9]:
import torch
import os
from datetime import datetime
import subprocess
from ultralytics import YOLO
import segmentation_models_pytorch as smp
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

In [6]:
HOME_DIR = "/content/drive/MyDrive/Pallets_detection"
YOLO_MODEL_PATH = os.path.join(HOME_DIR, "YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model.pt")
DEEPLABV3_MODEL_PATH = os.path.join(HOME_DIR, "Deeplabv3_ObjectSegmentation_Dataset/best_deeplabv3plus_model.pth")

In [19]:
# Function to export a PyTorch model to ONNX
def export_to_onnx(model, dummy_input, onnx_path, input_names=['input'], output_names=['output']):
    """
    Exports a PyTorch model to the ONNX format.

    Args:
        model (torch.nn.Module): The PyTorch model to export.
        dummy_input (torch.Tensor): A tensor with the same shape as the model's expected input.
        onnx_path (str): The file path to save the ONNX model.
        input_names (list, optional): Names for the input nodes. Defaults to ['input'].
        output_names (list, optional): Names for the output nodes. Defaults to ['output'].
    """
    torch.onnx.export(
        model,
        dummy_input,
        onnx_path,
        export_params=True,
        opset_version=11,
        do_constant_folding=True,
        input_names=input_names,
        output_names=output_names,
        dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    )
    print(f"Model exported to {onnx_path}")

# Export YOLO model
def export_yolo():
    """
    Loads the YOLO model using Ultralytics, prepares a dummy input, and exports it to ONNX.
    """
    # Load YOLO model using the Ultralytics YOLO class
    yolo_model = YOLO(YOLO_MODEL_PATH)
    yolo_model.model.eval()

    # Create dummy input (batch_size=1, channels=3, height=640, width=640)
    dummy_input = torch.randn(1, 3, 640, 640)

    # Define ONNX path
    yolo_onnx_path = os.path.join(HOME_DIR, "YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model.onnx")

    # Export to ONNX
    export_to_onnx(yolo_model.model, dummy_input, yolo_onnx_path)

# Export DeepLabV3+ model using SMP
def export_deeplabv3():
    """
    Loads the DeepLabV3+ model from SMP, loads the trained weights, prepares a dummy input, and exports it to ONNX.
    """
    # Define the number of classes as per your segmentation task
    num_classes = 3  # Background, Ground, Pallet

    # Instantiate the DeepLabV3+ model with the same parameters used during training
    deeplabv3_model = smp.DeepLabV3Plus(
        encoder_name='resnet101',        # Must match the encoder used during training
        encoder_weights=None,            # No encoder weights; assuming custom training
        in_channels=3,
        classes=num_classes,
        activation=None                  # Assuming no activation was used during training
    )

    # Load the trained state dictionary
    state_dict = torch.load(DEEPLABV3_MODEL_PATH, map_location=torch.device('cpu'))
    deeplabv3_model.load_state_dict(state_dict)
    deeplabv3_model.eval()

    # Move model to CPU (since we're exporting and not training)
    deeplabv3_model.to('cpu')

    # Create dummy input (batch_size=1, channels=3, height=512, width=512)
    dummy_input = torch.randn(1, 3, 512, 512)

    # Define ONNX path
    deeplabv3_onnx_path = os.path.join(HOME_DIR, "Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model.onnx")

    # Export to ONNX
    export_to_onnx(deeplabv3_model, dummy_input, deeplabv3_onnx_path)


export_yolo()
export_deeplabv3()


Model exported to /content/drive/MyDrive/Pallets_detection/YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model.onnx
Model exported to /content/drive/MyDrive/Pallets_detection/Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model.onnx


In [21]:
# Function to quantize ONNX model
def quantize_model(onnx_input_path, onnx_output_path):
    quantize_dynamic(
        model_input=onnx_input_path,
        model_output=onnx_output_path,
        weight_type=QuantType.QInt8
    )
    print(f"Quantized model saved to {onnx_output_path}")

if __name__ == "__main__":
    # Paths for YOLO
    yolo_onnx_path = os.path.join(HOME_DIR, "YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model.onnx")
    yolo_quantized_path = os.path.join(HOME_DIR, "YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model_quant.onnx")

    # Quantize YOLO ONNX model
    quantize_model(yolo_onnx_path, yolo_quantized_path)

    # Paths for DeepLabV3+
    deeplabv3_onnx_path = os.path.join(HOME_DIR, "Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model.onnx")
    deeplabv3_quantized_path = os.path.join(HOME_DIR, "Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model_quant.onnx")

    # Quantize DeepLabV3+ ONNX model
    quantize_model(deeplabv3_onnx_path, deeplabv3_quantized_path)




Quantized model saved to /content/drive/MyDrive/Pallets_detection/YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model_quant.onnx




Quantized model saved to /content/drive/MyDrive/Pallets_detection/Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model_quant.onnx


In [21]:
import os
import tensorrt as trt
import numpy as np
import onnx

def build_tensorrt_engine(onnx_path, engine_path, fp16=True):
    """
    Build TensorRT engine directly from ONNX model using current API
    """
    logger = trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(logger)
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    parser = trt.OnnxParser(network, logger)

    # Parse ONNX model
    with open(onnx_path, 'rb') as model:
        if not parser.parse(model.read()):
            for error in range(parser.num_errors):
                print(f"Parser Error {error}: {parser.get_error(error)}")
            raise ValueError("Failed to parse ONNX model")

    config = builder.create_builder_config()
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1GB

    # Enable FP16 precision
    if fp16 and builder.platform_has_fast_fp16:
        config.set_flag(trt.BuilderFlag.FP16)

    # Get input tensor details
    input_tensor = network.get_input(0)
    input_shape = input_tensor.shape

    # Create optimization profile
    profile = builder.create_optimization_profile()
    input_name = input_tensor.name
    min_shape = (1,) + tuple(input_shape[1:])
    opt_shape = (4,) + tuple(input_shape[1:])
    max_shape = (8,) + tuple(input_shape[1:])

    profile.set_shape(input_name, min_shape, opt_shape, max_shape)
    config.add_optimization_profile(profile)

    # Build and save engine using build_serialized_network
    engine_bytes = builder.build_serialized_network(network, config)
    if engine_bytes is None:
        raise RuntimeError("Failed to build TensorRT engine")

    # Save the serialized engine
    with open(engine_path, 'wb') as f:
        f.write(engine_bytes)

    print(f"TensorRT engine saved to: {engine_path}")

    # Create runtime and deserialize engine
    runtime = trt.Runtime(logger)
    engine = runtime.deserialize_cuda_engine(engine_bytes)
    return engine

def verify_model(model_path):
    """
    Verify ONNX model
    """
    try:
        model = onnx.load(model_path)
        onnx.checker.check_model(model)
        print(f"Model {model_path} verified successfully")
        return True
    except Exception as e:
        print(f"Model verification failed: {str(e)}")
        return False

def main():
    # Define paths
    HOME_DIR = "/content/drive/MyDrive/Pallets_detection"
    YOLO_DIR = os.path.join(HOME_DIR, "YOLO_ObjectDetection_Dataset/models")
    DEEPLABV3_DIR = os.path.join(HOME_DIR, "Deeplabv3_ObjectSegmentation_Dataset/model")

    # Create directories if they don't exist
    os.makedirs(YOLO_DIR, exist_ok=True)
    os.makedirs(DEEPLABV3_DIR, exist_ok=True)

    # Define model paths
    yolo_paths = {
        'original': os.path.join(YOLO_DIR, "best_YOLO11x_model.onnx"),
        'engine': os.path.join(YOLO_DIR, "best_YOLO11x_model.trt")
    }

    deeplabv3_paths = {
        'original': os.path.join(DEEPLABV3_DIR, "best_deeplabv3plus_model.onnx"),
        'engine': os.path.join(DEEPLABV3_DIR, "best_deeplabv3plus_model.trt")
    }

    try:
        # Process YOLO model
        print("Processing YOLO model...")
        if os.path.exists(yolo_paths['original']) and verify_model(yolo_paths['original']):
            build_tensorrt_engine(yolo_paths['original'], yolo_paths['engine'], fp16=True)
        else:
            print(f"YOLO model not found or invalid at {yolo_paths['original']}")

        # Process DeepLabV3+ model
        print("\nProcessing DeepLabV3+ model...")
        if os.path.exists(deeplabv3_paths['original']) and verify_model(deeplabv3_paths['original']):
            build_tensorrt_engine(deeplabv3_paths['original'], deeplabv3_paths['engine'], fp16=True)
        else:
            print(f"DeepLabV3+ model not found or invalid at {deeplabv3_paths['original']}")

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        import traceback
        traceback.print_exc()

    # Verify the conversion
    print("\nFile size verification:")
    for model_type, paths in [("YOLO", yolo_paths), ("DeepLabV3+", deeplabv3_paths)]:
        for key, path in paths.items():
            if os.path.exists(path):
                size_mb = os.path.getsize(path) / (1024 * 1024)
                print(f"{model_type} {key} model size: {size_mb:.2f} MB")

if __name__ == "__main__":
    main()

Processing YOLO model...
Model /content/drive/MyDrive/Pallets_detection/YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model.onnx verified successfully
TensorRT engine saved to: /content/drive/MyDrive/Pallets_detection/YOLO_ObjectDetection_Dataset/models/best_YOLO11x_model.trt

Processing DeepLabV3+ model...
Model /content/drive/MyDrive/Pallets_detection/Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model.onnx verified successfully
TensorRT engine saved to: /content/drive/MyDrive/Pallets_detection/Deeplabv3_ObjectSegmentation_Dataset/model/best_deeplabv3plus_model.trt

File size verification:
YOLO original model size: 216.95 MB
YOLO engine model size: 111.41 MB
DeepLabV3+ original model size: 174.07 MB
DeepLabV3+ engine model size: 88.50 MB
