In [1]:
import torchvision.models as models

# Load the pre-trained SqueezeNet model
model = models.squeezenet1_1(pretrained=True)

In [2]:
print(model)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): MaxPool2d

In [4]:
import torch
from torchvision import transforms
from torchvision.models import squeezenet1_1
from PIL import Image
import time

# Load the pre-trained SqueezeNet model
model = squeezenet1_1(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Load and preprocess an example image
image_path = '1.jpg'
input_image = Image.open(image_path).convert('RGB')

# Define the image transformation pipeline
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply transformations to the input image
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)  # Add a batch dimension

# Make the prediction
with torch.no_grad():
    start_time = time.time()
    output = model(input_batch)
    end_time = time.time()

# Get the predicted class index
_, predicted_idx = torch.max(output, 1)
predicted_class = "Cats" if predicted_idx.item() == 0 else "Dogs"

# Print the predicted class label and inference time
inference_time = end_time - start_time
print(f'Predicted class: {predicted_class}')
print(f'Inference time: {inference_time:.4f} seconds')


Predicted class: Dogs
Inference time: 0.7966 seconds


In [5]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total trainable parameters: {total_params}')

Total trainable parameters: 1235496


In [6]:
import torch
import torchvision.models as models

# Load the pre-trained SqueezeNet model
model = models.squeezenet1_1(pretrained=True)

# Define input size (224x224x3) for SqueezeNet
input_size = (224, 224, 3)

# Function to calculate FLOPs for the model
def count_flops(model, input_size):
    # Define a tensor of appropriate size
    input_tensor = torch.randn(1, *input_size)
    
    # Switch to evaluation mode
    model.eval()
    
    # Move model to appropriate device
    device = next(model.parameters()).device
    input_tensor = input_tensor.to(device)
    model.to(device)
    
    # Iterate through model's layers
    flops = 0
    for module in model.modules():
        if isinstance(module, torch.nn.Conv2d):
            # For convolutional layers
            output_size = (input_size[0] - module.kernel_size[0] + 2 * module.padding[0]) // module.stride[0] + 1
            output_size = (output_size - module.kernel_size[1] + 2 * module.padding[1]) // module.stride[1] + 1
            flops += module.in_channels * module.out_channels * module.kernel_size[0] * module.kernel_size[1] * output_size * output_size
            input_size = (output_size, output_size, module.out_channels)
        elif isinstance(module, torch.nn.MaxPool2d):
            # For max pooling layers
            output_size = (input_size[0] - module.kernel_size) // module.stride + 1
            flops += input_size[2] * output_size * output_size
            input_size = (output_size, output_size, input_size[2])
        elif isinstance(module, torch.nn.Linear):
            # For fully connected layers
            flops += module.in_features * module.out_features
            input_size = (module.out_features,)
    
    return flops

# Calculate FLOPs
total_flops = count_flops(model, input_size)
print("Total FLOPs:", total_flops)

Total FLOPs: 78414144


In [7]:
import torch
import torchvision.models as models
import torch.onnx

# Load the pre-trained SqueezeNet model
model = models.squeezenet1_1(pretrained=True)
model.eval()

# Specify the input size
dummy_input = torch.randn(1, 3, 224, 224)

# Convert the PyTorch model to ONNX
onnx_path = 'squeezenet.onnx'
torch.onnx.export(model, dummy_input, onnx_path, verbose=True)

graph(%input.1 : Float(1, 3, 224, 224, strides=[150528, 50176, 224, 1], requires_grad=0, device=cpu),
      %features.0.weight : Float(64, 3, 3, 3, strides=[27, 9, 3, 1], requires_grad=1, device=cpu),
      %features.0.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
      %features.3.squeeze.weight : Float(16, 64, 1, 1, strides=[64, 1, 1, 1], requires_grad=1, device=cpu),
      %features.3.squeeze.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
      %features.3.expand1x1.weight : Float(64, 16, 1, 1, strides=[16, 1, 1, 1], requires_grad=1, device=cpu),
      %features.3.expand1x1.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
      %features.3.expand3x3.weight : Float(64, 16, 3, 3, strides=[144, 9, 3, 1], requires_grad=1, device=cpu),
      %features.3.expand3x3.bias : Float(64, strides=[1], requires_grad=1, device=cpu),
      %features.4.squeeze.weight : Float(16, 128, 1, 1, strides=[128, 1, 1, 1], requires_grad=1, device=cpu),
      %features.4.sq

In [8]:
import tensorrt

In [9]:
!/usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/MODELS/Model_4/squeezenet.onnx --saveEngine=/nvdli-nano/data/Inference/MODELS/Model_4/squeezenet.trt

&&&& RUNNING TensorRT.trtexec [TensorRT v8201] # /usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/MODELS/Model_4/squeezenet.onnx --saveEngine=/nvdli-nano/data/Inference/MODELS/Model_4/squeezenet.trt
[02/08/2024-05:02:52] [I] === Model Options ===
[02/08/2024-05:02:52] [I] Format: ONNX
[02/08/2024-05:02:52] [I] Model: /nvdli-nano/data/Inference/MODELS/Model_4/squeezenet.onnx
[02/08/2024-05:02:52] [I] Output:
[02/08/2024-05:02:52] [I] === Build Options ===
[02/08/2024-05:02:52] [I] Max batch: explicit batch
[02/08/2024-05:02:52] [I] Workspace: 16 MiB
[02/08/2024-05:02:52] [I] minTiming: 1
[02/08/2024-05:02:52] [I] avgTiming: 8
[02/08/2024-05:02:52] [I] Precision: FP32
[02/08/2024-05:02:52] [I] Calibration: 
[02/08/2024-05:02:52] [I] Refit: Disabled
[02/08/2024-05:02:52] [I] Sparsity: Disabled
[02/08/2024-05:02:52] [I] Safe mode: Disabled
[02/08/2024-05:02:52] [I] DirectIO mode: Disabled
[02/08/2024-05:02:52] [I] Restricted mode: Disabled
[02/08/2024-05:02:52] [I] Save engi

In [2]:
import cv2
import numpy as np
import torch
import tensorrt as trt
import time

# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open('squeezenet.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

# Load and preprocess the input image
image_path = '1.jpg'
image = cv2.imread(image_path)
image = cv2.resize(image, (227, 227))  # Adjust size for SqueezeNet
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
image = image.astype(np.float32) / 255.0  # Normalize to [0, 1]
image = np.transpose(image, (2, 0, 1))  # Change to channel-first format

# Convert to torch tensor
image_tensor = torch.tensor(image, dtype=torch.float32)

# Run inference and measure time
with torch.no_grad():
    # Convert the tensor to a batched format
    input_data = image_tensor.unsqueeze(0)

    # Transfer the input tensor to GPU (if available)
    if torch.cuda.is_available():
        input_data = input_data.to('cuda')

    # Allocate device memory for the input tensor
    d_input = torch.cuda.FloatTensor(input_data)

    # Allocate device memory for the output tensor
    d_output = torch.empty((1, 1), device='cuda')  # Single output neuron for binary classification

    # Measure inference time
    start_time = time.time()

    # Run inference
    context.execute(1, bindings=[int(d_input.data_ptr()), int(d_output.data_ptr())])

    # Measure elapsed time
    elapsed_time = time.time() - start_time

    # Transfer the output tensor back to the host
    h_output = d_output.cpu().numpy()

# Post-process the output
predicted_probability = torch.sigmoid(torch.from_numpy(h_output)).item()

# Determine the predicted class based on probability threshold (e.g., 0.5)
predicted_class = "dog" if predicted_probability >= 0.5 else "cat"

# Print the predicted class and inference time
print("Predicted class:", predicted_class)
print("Predicted probability:", predicted_probability)
print("Inference time:", elapsed_time, "seconds")

Predicted class: dog
Predicted probability: 0.8116484880447388
Inference time: 0.16630077362060547 seconds
