## 1. Libs

In [1]:
# https://rwightman.github.io/pytorch-image-models/models/noisy-student/
# https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/efficientnet.py
# 'tf_efficientnet_b0_ns': _cfg(
# url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth',
# input_size=(3, 224, 224)),
# other model: swin_large_patch4_window12_384

import os
import albumentations # Fast image augmentation library
import pandas as pd
import cv2            # OpenCV 
import numpy as np
import timm           # (Unofficial) PyTorch Image Models
import torch
import torch.nn as nn

path_input = r'../input/petfinder-pawpularity-score/'

## 2. Load model using timm lib. Eg. tf_efficientnet_b0_ns

### Load model

In [2]:
model = timm.create_model('tf_efficientnet_b0_ns', pretrained = True, in_chans = 3) 
print(type(model))
# To see model. un-comment below
# model # or model.eval()
# The last one in model is (classifier): Linear(in_features=1280, out_features=1000, bias=True)

<class 'timm.models.efficientnet.EfficientNet'>


In [3]:
model

EfficientNet(
  (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
    )
 

In [4]:
model.classifier.in_features

1280

### Change the last part in the model

In [5]:
# Change (classifier): Linear(in_features=1280, out_features=1000, bias=True)
# To     (classifier): Linear(in_features=1280, out_features=128, bias=True) 

model.classifier = nn.Linear(model.classifier.in_features, 128)
type(model)

timm.models.efficientnet.EfficientNet

### Get image in suitable shape for the model

In [6]:
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

config = resolve_data_config({}, model=model)
config

{'input_size': (3, 224, 224),
 'interpolation': 'bicubic',
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'crop_pct': 0.875}

In [7]:
transform = create_transform(**config)
transform



Compose(
    Resize(size=256, interpolation=bicubic)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
)

In [8]:
# Refer to "Test Images with Open CV"
import urllib
from PIL import Image

df_train = pd.read_csv(os.path.join(path_input, 'train.csv')).reset_index(drop = True)
image_paths = [os.path.join(path_input, f'train/{img}.jpg') for img in df_train["Id"].values]
item = 9 # get one image. This is image 10
image = Image.open(image_paths[item]).convert('RGB')
image = transform(image)  # transform to np array
image.shape
# image = cv2.imread(image_paths[item]) 
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 

torch.Size([3, 224, 224])

In [9]:
image = image.unsqueeze(0) # add batch size as the first dimension
image.shape  # image is is a tensor([[[[...]]]]

torch.Size([1, 3, 224, 224])

### Run the model

In [10]:
x = model(image) 
x.shape
# Eg. x = tensor([[-0.0141,..., 0.0397]]

torch.Size([1, 128])

### Add dropout to the model

In [11]:
# Pytorch
dropout = nn.Dropout(0.1) # outpt: Dropout(p=0.1, inplace=False)
x = dropout(x)
x.shape
# Eg. x = tensor([[-0.0157,..., 0.0442]]

torch.Size([1, 128])

### Add MLP output layper

In [12]:
dense_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]

dense_features = df_train[dense_features].values
features = dense_features[item, :]  # item here = 9
features = torch.tensor(features, dtype=torch.float)
features

tensor([0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.])

In [13]:
features = features.unsqueeze(0)  # add batch size as the first dimension
features

tensor([[0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [14]:
x = torch.cat([x, features], dim=1)
x.shape  # 140 = 128 (x) + 12 (features)
# x = tensor([[-1.5678e-02,...4.4151e-02,   0.0000e+00,  0.0000e+00]]

torch.Size([1, 140])

In [15]:
out = nn.Linear(128 + 12, 1) # output: Linear(in_features=140, out_features=1, bias=True)

In [16]:
x = out(x)
x  # tensor([[0.0849]], grad_fn=<AddmmBackward>)

tensor([[-0.1916]], grad_fn=<AddmmBackward>)