In [1]:
import sys

sys.path.append("../input/tez-lib/")
sys.path.append("../input/timmmaster/")

## Import libraries

In [2]:
import tez
import albumentations
import pandas as pd
import cv2
import numpy as np
import torch
import torch.nn as nn
import math
import random
import timm
import gc
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt


from sklearn import metrics
from tez.callbacks import EarlyStopping
from tqdm import tqdm

ONLY_FIRST_FOLD=False

In [3]:
def set_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    #torch.cuda.manual_seed_all(seed) # if use multi-GPU

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2021)
print("set seed")

In [4]:
df = pd.read_csv("../input/pet-remove-dup-ver2/train_remove_dup_10folds.csv")
#df = pd.read_csv("../input/same-old-creating-folds/train_10folds.csv")

dense_features = [
    'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
    'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
]

In [5]:
class args:
    batch_size = 4
    image_size = 224
    epochs = 20
    fold = 10
    head_out = 192
    features = 12

In [6]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

In [7]:
class PawpularDataset:
    def __init__(self, image_paths, dense_features, targets, augmentations):
        self.image_paths = image_paths
        self.dense_features = dense_features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        
        # (720, 405, 3) -> (3, 720, 405)
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        features = self.dense_features[item, :]
        targets = self.targets[item]
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float),
        }

In [8]:
class PawpularModel(tez.Model):
    def __init__(self, model_name, train=True, head_out=128, features=12):
        super().__init__()
        
        self.model = timm.create_model(model_name, pretrained=train, in_chans=3)
        self.model.head = nn.Linear(self.model.head.in_features, head_out)
        self.dropout = nn.Dropout(0.1)
        self.dense1 = nn.Linear(head_out+features, 64)
        self.selu = nn.SELU()
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(64, 1)
        self.dense2.bias.data = torch.nn.Parameter(
            torch.Tensor([0.0])
        )
        
        self.step_scheduler_after = "epoch"


    def forward(self, image, features, targets=None):
        
        x = self.model(image) 
        #x = self.dropout(x1)
        x = torch.cat([x, features], dim=1) 
        #x = self.selu(x)
        x = self.dense1(x) 
        #x = self.relu(x)
        x = self.dense2(x) 
        #x = self.selu(x)
        
        # x = torch.cat([x, x1, features], dim=1)
        
        return x, 0, {}

## base aug

In [9]:
valid_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

In [10]:
%%time

super_final_predictions = []

for fold_ in range(args.fold):
    print('#'*25)
    print('### FOLD',fold_+1)
    print('#'*25)
    
    df_valid = df[df.kfold == fold_].reset_index(drop=True)
    
    valid_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid["Id"].values]
    
    valid_dataset = PawpularDataset(
        image_paths=valid_img_paths,
        dense_features=df_valid[dense_features].values,
        targets=df_valid.Pawpularity.values,
        augmentations=valid_aug,
    )

    model = PawpularModel('swin_small_patch4_window7_224', train=False,
                           head_out=args.head_out, features=args.features)

    model.load(f"../input/paw-ver4-model/ver4/model_f{fold_}.bin", device="cuda", weights_only=True)
    
    test_predictions = model.predict(valid_dataset, batch_size=args.batch_size, n_jobs=-1)

    final_test_predictions = []
    for preds in test_predictions:
        final_test_predictions.extend(preds.ravel().tolist())

    #final_test_predictions = [sigmoid(x) * 100 for x in final_test_predictions]
    super_final_predictions.append(final_test_predictions)
    
    if ONLY_FIRST_FOLD:
        break

## CV

In [11]:
df['oof'] = -1
for i in range(args.fold):
    df.loc[df['kfold']==i, 'oof'] = super_final_predictions[i]
    
    fold_preds = super_final_predictions[i]
    fold_target = df[df['kfold']==i]['Pawpularity']
    cv = np.sqrt((np.sum((fold_target - fold_preds)**2))/len(fold_preds))
    print(str(i+1)+' fold cv is ', np.round(cv, 5))
    
cv = np.sqrt((np.sum((df['Pawpularity'] - df['oof'])**2))/len(df['oof']))
print('whole fold cv is ', np.round(cv, 5))

In [12]:
fig = plt.figure()
sns.set(rc = {'figure.figsize':(15, 96)})

plt.plot(df[df['kfold']==0]['oof'].reset_index(drop=True))
plt.plot(super_final_predictions[0])

plt.show()

## preds vs target distribution

In [13]:
fig = plt.figure()
sns.set(rc = {'figure.figsize':(15, 96)})

for i in range(args.fold):
    axes = fig.add_subplot(args.fold, 1, i+1)
    
    sns.histplot(df[df['kfold']==i]['Pawpularity'], 
             color="red", label="target")

    sns.histplot(super_final_predictions[i], 
                 color="blue", label="preds")

    plt.title(str(i+1)+' fold target vs preds')
    plt.legend(title="predictions")

#fig.tight_layout()
plt.show()

In [14]:
std=[]
for i in range(args.fold):
    print('--------- fold '+str(i+1)+'------------')
    print('target min / max: ', df[df['kfold']==i]['Pawpularity'].min(), df[df['kfold']==i]['Pawpularity'].max())
    print('oof preds min / max: ', np.round(np.min(super_final_predictions[i]), 0), np.round(np.max(super_final_predictions[i]), 0))                       
    print('std oof / target: ',np.round(np.std(super_final_predictions[i])/df[df['kfold']==i]['Pawpularity'].std(), 2))
    print('----------------------------')
    std.append(np.std(super_final_predictions[i])/df[df['kfold']==i]['Pawpularity'].std())

print('mean of std oof / target: ', np.round(np.mean(std), 2))