In [1]:
import albumentations as A
import cv2
import torch



  from .autonotebook import tqdm as notebook_tqdm


# 1. Prepare 1 image input for `resnet50` model

In [2]:
# 1. Define the augmentation pipeline

transform = A.Compose([
    A.Resize(width=224, height=224, always_apply=True),
    A.Normalize(max_pixel_value=255.0, always_apply=True),
])

# 2. Load an image using OpenCV (Note: Albumentations expects RGB)

IMAGE_FILE='../../03-datasets/03-flickr-8k/data/Images/667626_18933d713e.jpg'

image = cv2.imread(IMAGE_FILE)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 3. Apply the transformations
# The result is a dictionary; we grab the 'image' key
transformed = transform(image=image)
transformed_image = transformed["image"]

# 4. convert numpy array to pytorch tensor
transformed_tensor = torch.from_numpy(transformed_image)

# 5. Prepare the tensor for ResNet50: expected shape [1, 3, 224, 224] and dtype float, normalized to [0,1]
# Permute dimensions from [H, W, C] to [C, H, W]
resnet_input = transformed_tensor.permute(2, 0, 1).unsqueeze(0).float()
resnet_input.shape



  A.Resize(width=224, height=224, always_apply=True),
  A.Normalize(max_pixel_value=255.0, always_apply=True),


torch.Size([1, 3, 224, 224])

# 2. load resnet50 with timm

In [6]:


import timm

model = timm.create_model(
    'resnet50', 
    pretrained=True,
    num_classes=0
)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     

# 3. inference 

And check the output shape.

In [4]:
model(resnet_input).shape

torch.Size([1, 2048])