In [4]:
import torchvision.models as models

# Load the pre-trained ShuffleNet V2 model with scale factor x2.0
model = models.shufflenet_v2_x1_0(pretrained=True)

Downloading: "https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth" to /root/.cache/torch/hub/checkpoints/shufflenetv2_x1-5666bf0f80.pth
100.0%


In [5]:
print(model)

ShuffleNetV2(
  (conv1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (stage2): Sequential(
    (0): InvertedResidual(
      (branch1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): ReLU(inplace=True)
      )
      (branch2): Sequential(
        (0): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running_

In [7]:
import torch
from torchvision import transforms
from torchvision.models import shufflenet_v2_x1_0
from PIL import Image
import time

# Load the pre-trained ShuffleNet V2 1.0 model
model = shufflenet_v2_x1_0(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Load and preprocess an example image
image_path = '1.jpg'
input_image = Image.open(image_path).convert('RGB')

# Define the image transformation pipeline
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply transformations to the input image
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)  # Add a batch dimension

# Make the prediction
with torch.no_grad():
    start_time = time.time()
    output = model(input_batch)
    end_time = time.time()

# Get the predicted class index
_, predicted_idx = torch.max(output, 1)
predicted_class = "Cats" if predicted_idx.item() == 0 else "Dogs"

# Print the predicted class label and inference time
inference_time = end_time - start_time
print(f'Predicted class: {predicted_class}')
print(f'Inference time: {inference_time:.4f} seconds')


Predicted class: Dogs
Inference time: 4.7121 seconds


In [8]:
import torch
import torchvision.models as models
import torch.onnx

# Load the pre-trained ShuffleNet model
model = models.shufflenet_v2_x1_0(pretrained=True)
model.eval()

# Specify the input size
dummy_input = torch.randn(1, 3, 224, 224)

# Convert the PyTorch model to ONNX
onnx_path = 'shufflenet.onnx'
torch.onnx.export(model, dummy_input, onnx_path, verbose=True)

  channels_per_group = num_channels // groups


graph(%input.1 : Float(1, 3, 224, 224, strides=[150528, 50176, 224, 1], requires_grad=0, device=cpu),
      %fc.weight : Float(1000, 1024, strides=[1024, 1], requires_grad=1, device=cpu),
      %fc.bias : Float(1000, strides=[1], requires_grad=1, device=cpu),
      %854 : Float(24, 3, 3, 3, strides=[27, 9, 3, 1], requires_grad=0, device=cpu),
      %855 : Float(24, strides=[1], requires_grad=0, device=cpu),
      %857 : Float(24, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cpu),
      %858 : Float(24, strides=[1], requires_grad=0, device=cpu),
      %860 : Float(58, 24, 1, 1, strides=[24, 1, 1, 1], requires_grad=0, device=cpu),
      %861 : Float(58, strides=[1], requires_grad=0, device=cpu),
      %863 : Float(58, 24, 1, 1, strides=[24, 1, 1, 1], requires_grad=0, device=cpu),
      %864 : Float(58, strides=[1], requires_grad=0, device=cpu),
      %866 : Float(58, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=0, device=cpu),
      %867 : Float(58, strides=[1], requires_grad=0

In [9]:
import tensorrt

In [10]:
!/usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/MODELS/Model_6/shufflenet.onnx --saveEngine=/nvdli-nano/data/Inference/MODELS/Model_6/shufflenet.trt

&&&& RUNNING TensorRT.trtexec [TensorRT v8201] # /usr/src/tensorrt/bin/trtexec --onnx=/nvdli-nano/data/Inference/MODELS/Model_6/shufflenet.onnx --saveEngine=/nvdli-nano/data/Inference/MODELS/Model_6/shufflenet.trt
[02/08/2024-05:54:45] [I] === Model Options ===
[02/08/2024-05:54:45] [I] Format: ONNX
[02/08/2024-05:54:45] [I] Model: /nvdli-nano/data/Inference/MODELS/Model_6/shufflenet.onnx
[02/08/2024-05:54:45] [I] Output:
[02/08/2024-05:54:45] [I] === Build Options ===
[02/08/2024-05:54:45] [I] Max batch: explicit batch
[02/08/2024-05:54:45] [I] Workspace: 16 MiB
[02/08/2024-05:54:45] [I] minTiming: 1
[02/08/2024-05:54:45] [I] avgTiming: 8
[02/08/2024-05:54:45] [I] Precision: FP32
[02/08/2024-05:54:45] [I] Calibration: 
[02/08/2024-05:54:45] [I] Refit: Disabled
[02/08/2024-05:54:45] [I] Sparsity: Disabled
[02/08/2024-05:54:45] [I] Safe mode: Disabled
[02/08/2024-05:54:45] [I] DirectIO mode: Disabled
[02/08/2024-05:54:45] [I] Restricted mode: Disabled
[02/08/2024-05:54:45] [I] Save engi

In [3]:
import cv2
import numpy as np
import torch
import tensorrt as trt
import time

# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open('shufflenet.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    context = engine.create_execution_context()

# Load and preprocess the input image
image_path = '1.jpg'
image = cv2.imread(image_path)
image = cv2.resize(image, (224, 224))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
image = image.astype(np.float32) / 255.0  # Normalize to [0, 1]
image = np.transpose(image, (2, 0, 1))  # Change to channel-first format

# Convert to torch tensor
image_tensor = torch.tensor(image, dtype=torch.float32)

# Run inference and measure time
with torch.no_grad():
    # Convert the tensor to a batched format
    input_data = image_tensor.unsqueeze(0)

    # Transfer the input tensor to GPU (if available)
    if torch.cuda.is_available():
        input_data = input_data.to('cuda')

    # Allocate device memory for the input tensor
    d_input = torch.cuda.FloatTensor(input_data)

    # Allocate device memory for the output tensor
    d_output = torch.empty((1, 2), device='cuda')  # Assuming 2 classes (dogs and cats)

    # Measure inference time
    start_time = time.time()

    # Run inference
    context.execute(1, bindings=[int(d_input.data_ptr()), int(d_output.data_ptr())])

    # Measure elapsed time
    elapsed_time = time.time() - start_time

    # Transfer the output tensor back to the host
    h_output = d_output.cpu().numpy()

# Post-process the output
predictions = torch.from_numpy(h_output).cpu().numpy()

# Assuming the first class corresponds to 'cats' and the second to 'dogs'
class_names = ['cats', 'dogs']
predicted_class = np.argmax(predictions)

# Print the predicted class and inference time
print("Predicted class:", class_names[predicted_class])
print("Inference time:", elapsed_time, "seconds")

Predicted class: cats
Inference time: 0.3463137149810791 seconds


In [4]:
import torch
import torchvision.models as models

# Load the pretrained ShuffleNet model
model = models.shufflenet_v2_x1_0(pretrained=True)

# Define input size (224x224x3) for ShuffleNet
input_size = (224, 224, 3)

# Function to calculate FLOPs for the model
def count_flops(model, input_size):
    # Define a tensor of appropriate size
    input_tensor = torch.randn(1, *input_size)
    
    # Switch to evaluation mode
    model.eval()
    
    # Move model to appropriate device
    device = next(model.parameters()).device
    input_tensor = input_tensor.to(device)
    model.to(device)
    
    # Iterate through model's layers
    flops = 0
    for module in model.modules():
        if isinstance(module, torch.nn.Conv2d):
            # For convolutional layers
            output_size = (input_size[0] - module.kernel_size[0] + 2 * module.padding[0]) // module.stride[0] + 1
            output_size = (output_size - module.kernel_size[1] + 2 * module.padding[1]) // module.stride[1] + 1
            flops += module.in_channels * module.out_channels * module.kernel_size[0] * module.kernel_size[1] * output_size * output_size
            input_size = (output_size, output_size, module.out_channels)
        elif isinstance(module, torch.nn.Linear):
            # For fully connected layers
            flops += module.in_features * module.out_features
            input_size = (module.out_features,)
    
    return flops

# Calculate FLOPs
total_flops = count_flops(model, input_size)
print("Total FLOPs:", total_flops)

Total FLOPs: 11632608


In [5]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total trainable parameters: {total_params}')

Total trainable parameters: 2278604
