In [1]:
import torchvision.models as models

# Load the pre-trained DenseNet model
model = models.densenet201(pretrained=True)

Downloading: "https://download.pytorch.org/models/densenet201-c1103571.pth" to /root/.cache/torch/hub/checkpoints/densenet201-c1103571.pth
100.0%


In [2]:
print(model)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [6]:
import torch
from torchvision import transforms
from torchvision.models import densenet121
from PIL import Image
import time

# Load the pre-trained DenseNet-121 model
model = densenet121(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Load and preprocess an example image
image_path = '1.jpg'
input_image = Image.open(image_path).convert('RGB')

# Define the image transformation pipeline
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply transformations to the input image
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)  # Add a batch dimension

# Make the prediction
with torch.no_grad():
    start_time = time.time()
    output = model(input_batch)
    end_time = time.time()

# Get the predicted class index
_, predicted_idx = torch.max(output, 1)
predicted_class = "Cats" if predicted_idx.item() == 0 else "Dogs"

# Print the predicted class label and inference time
inference_time = end_time - start_time
print(f'Predicted class: {predicted_class}')
print(f'Inference time: {inference_time:.4f} seconds')

Predicted class: Dogs
Inference time: 1.0132 seconds


In [7]:
import torch
import torchvision.models as models
import torch.onnx

# Load the pre-trained DenseNet model
model = models.densenet121(pretrained=True)
model.eval()

# Specify the input size
dummy_input = torch.randn(1, 3, 224, 224)

# Convert the PyTorch model to ONNX
onnx_path = 'densenet121.onnx'
torch.onnx.export(model, dummy_input, onnx_path, verbose=True)

graph(%input.1 : Float(1, 3, 224, 224, strides=[150528, 50176, 224, 1], requires_grad=0, device=cpu),
      %features.denseblock1.denselayer1.norm1.weight : Float(64, strides=[1], requires_grad=1, device=cpu),
      %features.denseblock1.denselayer1.norm1.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
      %features.denseblock1.denselayer1.norm1.running_mean : Float(64, strides=[1], requires_grad=0, device=cpu),
      %features.denseblock1.denselayer1.norm1.running_var : Float(64, strides=[1], requires_grad=0, device=cpu),
      %features.denseblock1.denselayer1.conv2.weight : Float(32, 128, 3, 3, strides=[1152, 9, 3, 1], requires_grad=1, device=cpu),
      %features.denseblock1.denselayer2.norm1.weight : Float(96, strides=[1], requires_grad=1, device=cpu),
      %features.denseblock1.denselayer2.norm1.bias : Float(96, strides=[1], requires_grad=1, device=cpu),
      %features.denseblock1.denselayer2.norm1.running_mean : Float(96, strides=[1], requires_grad=0, device=cpu)

In [8]:
import tensorrt

In [9]:
!/usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/MODELS/Model_5/densenet121.onnx --saveEngine=/nvdli-nano/data/Inference/MODELS/Model_5/densenet121.trt

&&&& RUNNING TensorRT.trtexec [TensorRT v8201] # /usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/MODELS/Model_5/densenet121.onnx --saveEngine=/nvdli-nano/data/Inference/MODELS/Model_5/densenet121.trt
[02/08/2024-05:30:30] [I] === Model Options ===
[02/08/2024-05:30:30] [I] Format: ONNX
[02/08/2024-05:30:30] [I] Model: /nvdli-nano/data/Inference/MODELS/Model_5/densenet121.onnx
[02/08/2024-05:30:30] [I] Output:
[02/08/2024-05:30:30] [I] === Build Options ===
[02/08/2024-05:30:30] [I] Max batch: explicit batch
[02/08/2024-05:30:30] [I] Workspace: 16 MiB
[02/08/2024-05:30:30] [I] minTiming: 1
[02/08/2024-05:30:30] [I] avgTiming: 8
[02/08/2024-05:30:30] [I] Precision: FP32
[02/08/2024-05:30:30] [I] Calibration: 
[02/08/2024-05:30:30] [I] Refit: Disabled
[02/08/2024-05:30:30] [I] Sparsity: Disabled
[02/08/2024-05:30:30] [I] Safe mode: Disabled
[02/08/2024-05:30:30] [I] DirectIO mode: Disabled
[02/08/2024-05:30:30] [I] Restricted mode: Disabled
[02/08/2024-05:30:30] [I] Save e

In [12]:
import cv2
import numpy as np
import torch
import tensorrt as trt
import time

# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open('densenet121.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

# Load and preprocess the input image
image_path = '1.jpg'
image = cv2.imread(image_path)
image = cv2.resize(image, (224, 224))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
image = image.astype(np.float32) / 255.0  # Normalize to [0, 1]
image = np.transpose(image, (2, 0, 1))  # Change to channel-first format

# Convert to torch tensor
image_tensor = torch.tensor(image, dtype=torch.float32)

# Run inference and measure time
with torch.no_grad():
    # Convert the tensor to a batched format
    input_data = image_tensor.unsqueeze(0)

    # Transfer the input tensor to GPU (if available)
    if torch.cuda.is_available():
        input_data = input_data.to('cuda')

    # Allocate device memory for the input tensor
    d_input = torch.cuda.FloatTensor(input_data)

    # Allocate device memory for the output tensor
    d_output = torch.empty((1, 2), device='cuda')  # Assuming 2 classes (dogs and cats)

    # Measure inference time
    start_time = time.time()

    # Run inference
    context.execute(1, bindings=[int(d_input.data_ptr()), int(d_output.data_ptr())])

    # Measure elapsed time
    elapsed_time = time.time() - start_time

    # Transfer the output tensor back to the host
    h_output = d_output.cpu().numpy()

# Post-process the output
predictions = torch.from_numpy(h_output).cpu().numpy()

# Assuming the first class corresponds to 'cats' and the second to 'dogs'
class_names = ['cats', 'dogs']
predicted_class = np.argmax(predictions)

# Print the predicted class and inference time
print("Predicted class:", class_names[predicted_class])
print("Inference time:", elapsed_time, "seconds")

Predicted class: dogs
Inference time: 0.4697723388671875 seconds


In [13]:
import torch
import torchvision.models as models

# Load the pretrained DenseNet model
model = models.densenet121(pretrained=True)

# Define input size (224x224x3) for DenseNet
input_size = (224, 224, 3)

# Function to calculate FLOPs for the model
def count_flops(model, input_size):
    # Define a tensor of appropriate size
    input_tensor = torch.randn(1, *input_size)
    
    # Switch to evaluation mode
    model.eval()
    
    # Move model to appropriate device
    device = next(model.parameters()).device
    input_tensor = input_tensor.to(device)
    model.to(device)
    
    # Iterate through model's layers
    flops = 0
    for module in model.modules():
        if isinstance(module, torch.nn.Conv2d):
            # For convolutional layers
            output_size = (input_size[0] - module.kernel_size[0] + 2 * module.padding[0]) // module.stride[0] + 1
            output_size = (output_size - module.kernel_size[1] + 2 * module.padding[1]) // module.stride[1] + 1
            flops += module.in_channels * module.out_channels * module.kernel_size[0] * module.kernel_size[1] * output_size * output_size
            input_size = (output_size, output_size, module.out_channels)
        elif isinstance(module, torch.nn.Linear):
            # For fully connected layers
            flops += module.in_features * module.out_features
            input_size = (module.out_features,)
    
    return flops

# Calculate FLOPs
total_flops = count_flops(model, input_size)
print("Total FLOPs:", total_flops)

Total FLOPs: 21545996288


In [14]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total trainable parameters: {total_params}')

Total trainable parameters: 7978856
