# Import libraries

In [9]:
import os
import warnings
from pprint import pprint
from glob import glob
from tqdm import tqdm

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision.transforms as T
from box import Box
from timm import create_model
from sklearn.model_selection import StratifiedKFold, train_test_split
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset, TensorDataset
import optuna, datetime

import pytorch_lightning as pl
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.progress import ProgressBarBase
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule, LightningModule
# from pytorch_lightning.loggers import WandbLogger
import albumentations as A
from albumentations.pytorch import ToTensorV2
import wandb
import cv2
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
import shutil
import os
import pickle
from torch.utils.data.sampler import WeightedRandomSampler
from torch.utils.data import TensorDataset
import time
import gc
from tqdm import tqdm
import random

warnings.filterwarnings("ignore")

# Meta info

In [2]:
drive_root = '..'
TRAIN_DIR = f"{drive_root}/input/petfinder-pawpularity-score/train"
TEST_DIR = f"{drive_root}/input/petfinder-pawpularity-score/test"
DENSE_FEATURES = [
    'Subject Focus',
    'Eyes',
    'Face',
    'Near',
    'Action',
    'Accessory',
    'Group',
    'Collage',
    'Human',
    'Occlusion',
    'Info',
    'Blur',
]
df_train = pd.read_csv(f"{drive_root}/input/petfinder-pawpularity-score/train.csv")
df_test = pd.read_csv(f"{drive_root}/input/petfinder-pawpularity-score/test.csv")
df_train['filepath'] = df_train.Id.apply(lambda x :f"{TRAIN_DIR}/{x}.jpg" )
df_test['filepath'] = df_test.Id.apply(lambda x :f"{TEST_DIR}/{x}.jpg" )

# Losses

In [3]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss


# Dataset

In [4]:
class CustomDataset(Dataset):
    def __init__(self, x, y=None):#, image_size=224):
        self._X = x
        self._y = y

    def __len__(self):
        return len(self._X)

    def __getitem__(self, idx):
        feature = self._X[idx]
        if self._y is not None:
            label = self._y[idx]
            return feature, label
        return feature

# EarlyStopper

In [5]:
class EarlyStopper():
    def __init__(self, patience: int, mode:str)-> None:
        self.patience = patience
        self.mode = mode

        # Initiate
        self.patience_counter = 0
        self.stop = False
        self.best_loss = np.inf
        
    def check_early_stopping(self, loss: float)-> None:
        loss = -loss if self.mode == 'max' else loss  # get max value if mode set to max

        if loss >= self.best_loss:
            # got better score
            self.patience_counter += 1
            
            if self.patience_counter == self.patience:
                self.stop = True  # end

        elif loss < self.best_loss:
            # got worse score
            self.patience_counter = 0
            self.best_loss = loss
            


# Setting arguments (swin_2021)

In [31]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"
model_code = 'cait_2026'
seed = 2026
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
random.seed(seed)
seed_everything(seed)
torch.autograd.set_detect_anomaly(True)

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)


Global seed set to 2026


# Parameters (sr)

In [32]:
study_save_dir = f'{drive_root}/output/weights/final_weights/{model_code}_2nd_head/simple_head/'

with open(f"{study_save_dir}01-simple-head.pkl", 'rb') as file:
    study = pickle.load(file)
    
trial = study.best_trial
print(f"Best trial:{trial.number}")

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:224
  Params: 
    learning_rate: 0.0036668781955411876
    hidden_Dim: 64
    patience: 50
    activation: nn.CELU
    first_drop: 0.7436634507558222
    second_drop: 0.020778489863173384
    third_drop: 0.775613910041795
    beta1: 0.33129254099892996
    beta2: 0.0815198907791905
    weight_decay: 0.7996296214002552
    amsgrad: False
    T_0: 60
    T_mult: 74
    eta_min: 4.86860523185378e-06
    external_feature: False


In [33]:
for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    model_save_dir = f'{drive_root}/output/weights/final_weights/{model_code}/'
    head_dir = f'{drive_root}/output/weights/final_weights/{model_code}_2nd_head/simple_head/SimpleHead_fold{fold}.ckpt'
    result_save_dir = f'./prediction/{model_code}/sr/'
    
    train_df = df_train.loc[train_idx].reset_index(drop=True)
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    
    # Load embed features
    val_embed_features = np.load(f'{model_save_dir}val_embed_org_fold{fold}.npy')
    val_embed_flip_features = np.load(f'{model_save_dir}val_embed_flip_fold{fold}.npy')
    val_embed_merge_features = np.load(f'{model_save_dir}val_embed_merge_org_fold{fold}.npy')
    val_embed_merge_flip_features = np.load(f'{model_save_dir}val_embed_merge_flip_fold{fold}.npy')
    
    if trial.params['external_feature']: 
        org_feature = torch.FloatTensor(val_embed_merge_features).to('cuda')
        flip_feature = torch.FloatTensor(val_embed_merge_flip_features).to('cuda')
    else:
        org_feature = torch.FloatTensor(val_embed_features).to('cuda')
        flip_feature = torch.FloatTensor(val_embed_flip_features).to('cuda')
        
    q = pickle.load(open(head_dir, "rb"))
    q.eval()
    with torch.no_grad():
        org_pred = q(org_feature)
        flip_pred = q(flip_feature)
        org_pred = torch.sigmoid(org_pred).squeeze(dim=-1).cpu().numpy() * 100    
        flip_pred = torch.sigmoid(flip_pred).squeeze(dim=-1).cpu().numpy() * 100
        pred = (org_pred + flip_pred)/2.
        
    score = mean_squared_error(val_df.Pawpularity, pred)**0.5
    print(f'fold {fold} score : {score}')
    np.save(f'{result_save_dir}val_predicts{fold}.npy', pred)
    
    


fold 0 score : 17.615340480624496
fold 1 score : 17.634135847624798
fold 2 score : 17.12036336854731
fold 3 score : 16.90874505331735
fold 4 score : 17.379376203055436
fold 5 score : 17.52551618878885
fold 6 score : 17.206142892551203
fold 7 score : 17.37907318127309
fold 8 score : 17.292821315820973
fold 9 score : 17.801377573454154


# Parameters (qr)

In [34]:
study_save_dir = f'{drive_root}/output/weights/final_weights/{model_code}_2nd_head/qr_head/'

with open(f"{study_save_dir}02-quantile-head.pkl", 'rb') as file:
    study = pickle.load(file)
    
trial = study.best_trial
print(f"Best trial:{trial.number}")

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:386
  Params: 
    learning_rate: 0.0009310636198014082
    hidden_Dim: 256
    patience: 50
    activation: nn.LogSigmoid
    first_drop: 0.2202952838292654
    second_drop: 0.6206132613464477
    third_drop: 0.8714714822686737
    qunatile_num: 583
    beta1: 0.7317231733336342
    beta2: 0.2514265901050952
    weight_decay: 0.029055884299404165
    amsgrad: True
    T_0: 32
    T_mult: 94
    eta_min: 7.2505756003026e-08
    external_feature: True


In [35]:
for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    model_save_dir = f'{drive_root}/output/weights/final_weights/{model_code}/'
    head_dir = f'{drive_root}/output/weights/final_weights/{model_code}_2nd_head/qr_head/QRHead_fold{fold}.ckpt'
    result_save_dir = f'./prediction/{model_code}/qr/'
    
    train_df = df_train.loc[train_idx].reset_index(drop=True)
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    
    # Load embed features
    val_embed_features = np.load(f'{model_save_dir}val_embed_org_fold{fold}.npy')
    val_embed_flip_features = np.load(f'{model_save_dir}val_embed_flip_fold{fold}.npy')
    val_embed_merge_features = np.load(f'{model_save_dir}val_embed_merge_org_fold{fold}.npy')
    val_embed_merge_flip_features = np.load(f'{model_save_dir}val_embed_merge_flip_fold{fold}.npy')
    
    if trial.params['external_feature']: 
        org_feature = torch.FloatTensor(val_embed_merge_features).to('cuda')
        flip_feature = torch.FloatTensor(val_embed_merge_flip_features).to('cuda')
    else:
        org_feature = torch.FloatTensor(val_embed_features).to('cuda')
        flip_feature = torch.FloatTensor(val_embed_flip_features).to('cuda')
        
    q = pickle.load(open(head_dir, "rb"))
    q.eval()
    with torch.no_grad():
        org_pred = q(org_feature)
        flip_pred = q(flip_feature)
        org_pred = org_pred.median(dim=-1)[0].cpu().numpy() * 100  
        flip_pred = flip_pred.median(dim=-1)[0].cpu().numpy() * 100  
        pred = (org_pred + flip_pred)/2.
        
    score = mean_squared_error(val_df.Pawpularity, pred)**0.5
    print(f'fold {fold} score : {score}')
    np.save(f'{result_save_dir}val_predicts{fold}.npy', pred)
    
    


fold 0 score : 17.648856408305196
fold 1 score : 17.74933011730671
fold 2 score : 17.198119980035237
fold 3 score : 16.89868000838977
fold 4 score : 17.36855202433688
fold 5 score : 17.416613916918852
fold 6 score : 17.258448357758734
fold 7 score : 17.631473304601197
fold 8 score : 17.325556574129514
fold 9 score : 17.8702082220694


# Parameters (by)

In [36]:
study_save_dir = f'{drive_root}/output/weights/final_weights/{model_code}_2nd_head/by_head/'

with open(f"{study_save_dir}03-bayes-head.pkl", 'rb') as file:
    study = pickle.load(file)
    
trial = study.best_trial
print(f"Best trial:{trial.number}")

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Best trial:399
  Params: 
    learning_rate: 0.001271654544291899
    h1: 32
    h2: 2048
    patience: 50
    activation: nn.Hardswish
    first_drop: 0.698555927140175
    second_drop: 0.20381523191503506
    third_drop: 0.2006610172177032
    beta1: 0.31608764085674934
    beta2: 0.6580266789634163
    weight_decay: 0.2929888186928512
    amsgrad: False
    T_0: 31
    T_mult: 19
    eta_min: 9.723090090029141e-09
    external_feature: True


In [37]:
for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    model_save_dir = f'{drive_root}/output/weights/final_weights/{model_code}/'
    head_dir = f'{drive_root}/output/weights/final_weights/{model_code}_2nd_head/by_head/BayesHead_fold{fold}.ckpt'
    result_save_dir = f'./prediction/{model_code}/by/'
    
    train_df = df_train.loc[train_idx].reset_index(drop=True)
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    
    # Load embed features
    val_embed_features = np.load(f'{model_save_dir}val_embed_org_fold{fold}.npy')
    val_embed_flip_features = np.load(f'{model_save_dir}val_embed_flip_fold{fold}.npy')
    val_embed_merge_features = np.load(f'{model_save_dir}val_embed_merge_org_fold{fold}.npy')
    val_embed_merge_flip_features = np.load(f'{model_save_dir}val_embed_merge_flip_fold{fold}.npy')
    
    if trial.params['external_feature']: 
        org_feature = torch.FloatTensor(val_embed_merge_features).to('cuda')
        flip_feature = torch.FloatTensor(val_embed_merge_flip_features).to('cuda')
    else:
        org_feature = torch.FloatTensor(val_embed_features).to('cuda')
        flip_feature = torch.FloatTensor(val_embed_flip_features).to('cuda')
        
    q = pickle.load(open(head_dir, "rb"))
    q.eval()
    with torch.no_grad():
        org_pred = q(org_feature)
        flip_pred = q(flip_feature)
        org_pred = torch.sigmoid(org_pred[:, 0]).cpu().numpy() * 100  
        flip_pred = torch.sigmoid(flip_pred[:, 0]).cpu().numpy() * 100  
        pred = (org_pred + flip_pred)/2.
        
    score = mean_squared_error(val_df.Pawpularity, pred)**0.5
    print(f'fold {fold} score : {score}')
    np.save(f'{result_save_dir}val_predicts{fold}.npy', pred)
    
    


fold 0 score : 17.581717963551093
fold 1 score : 17.611075253504143
fold 2 score : 17.052531405533767
fold 3 score : 16.85781319742837
fold 4 score : 17.425451147113776
fold 5 score : 17.363612422910254
fold 6 score : 17.222130543540125
fold 7 score : 17.336935670219017
fold 8 score : 17.29069533575795
fold 9 score : 17.708355860658706


# Make dataframe

In [48]:
model_code = 'cait_2026'
seed = 2026
np.random.seed(seed)
random.seed(seed)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
id_lst = []

exec(f'{model_code}_org_lst=[]')
exec(f'{model_code}_sr_lst=[]')
exec(f'{model_code}_qr_lst=[]')
exec(f'{model_code}_by_lst=[]')

for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    org_predict = np.load(f'./prediction/{model_code}/org/val_predicts_fold{fold}.npy')
    sr_predict = np.load(f'./prediction/{model_code}/sr/val_predicts{fold}.npy')
    qr_predict = np.load(f'./prediction/{model_code}/qr/val_predicts{fold}.npy')
    by_predict = np.load(f'./prediction/{model_code}/by/val_predicts{fold}.npy')
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    id_lst.extend(val_df['Id'].tolist())
    exec(f'{model_code}_org_lst.extend(org_predict.tolist())')
    exec(f'{model_code}_sr_lst.extend(sr_predict.tolist())')
    exec(f'{model_code}_qr_lst.extend(qr_predict.tolist())')
    exec(f'{model_code}_by_lst.extend(by_predict.tolist())')

In [50]:
cait_2026_df = pd.DataFrame({'Id':id_lst, 'cait_2026_org':cait_2026_org_lst, 'cait_2026_sr':cait_2026_sr_lst, 'cait_2026_qr':cait_2026_qr_lst, 'cait_2026_by':cait_2026_by_lst})

In [52]:
model_code = 'swin_2021'
seed = 2021
np.random.seed(seed)
random.seed(seed)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
id_lst = []

exec(f'{model_code}_org_lst=[]')
exec(f'{model_code}_sr_lst=[]')
exec(f'{model_code}_qr_lst=[]')
exec(f'{model_code}_by_lst=[]')

for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    org_predict = np.load(f'./prediction/{model_code}/org/val_predicts_fold{fold}.npy')
    sr_predict = np.load(f'./prediction/{model_code}/sr/val_predicts{fold}.npy')
    qr_predict = np.load(f'./prediction/{model_code}/qr/val_predicts{fold}.npy')
    by_predict = np.load(f'./prediction/{model_code}/by/val_predicts{fold}.npy')
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    id_lst.extend(val_df['Id'].tolist())
    exec(f'{model_code}_org_lst.extend(org_predict.tolist())')
    exec(f'{model_code}_sr_lst.extend(sr_predict.tolist())')
    exec(f'{model_code}_qr_lst.extend(qr_predict.tolist())')
    exec(f'{model_code}_by_lst.extend(by_predict.tolist())')

In [53]:
swin_2021_df = pd.DataFrame({'Id':id_lst, 'swin_2021_org':swin_2021_org_lst, 'swin_2021_sr':swin_2021_sr_lst, 'swin_2021_qr':swin_2021_qr_lst, 'swin_2021_by':swin_2021_by_lst})

In [54]:
model_code = 'swin_2025'
seed = 2025
np.random.seed(seed)
random.seed(seed)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
id_lst = []

exec(f'{model_code}_org_lst=[]')
exec(f'{model_code}_sr_lst=[]')
exec(f'{model_code}_qr_lst=[]')
exec(f'{model_code}_by_lst=[]')

for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    org_predict = np.load(f'./prediction/{model_code}/org/val_predicts_fold{fold}.npy')
    sr_predict = np.load(f'./prediction/{model_code}/sr/val_predicts{fold}.npy')
    qr_predict = np.load(f'./prediction/{model_code}/qr/val_predicts{fold}.npy')
    by_predict = np.load(f'./prediction/{model_code}/by/val_predicts{fold}.npy')
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    id_lst.extend(val_df['Id'].tolist())
    exec(f'{model_code}_org_lst.extend(org_predict.tolist())')
    exec(f'{model_code}_sr_lst.extend(sr_predict.tolist())')
    exec(f'{model_code}_qr_lst.extend(qr_predict.tolist())')
    exec(f'{model_code}_by_lst.extend(by_predict.tolist())')

In [55]:
swin_2025_df = pd.DataFrame({'Id':id_lst, 'swin_2025_org':swin_2025_org_lst, 'swin_2025_sr':swin_2025_sr_lst, 'swin_2025_qr':swin_2025_qr_lst, 'swin_2025_by':swin_2025_by_lst})

In [68]:
pred_df = pd.merge(left = swin_2021_df , right = swin_2025_df, how = "inner", on = "Id")
pred_df = pd.merge(left = pred_df , right = cait_2026_df, how = "inner", on = "Id")

In [69]:
answer_df = pd.DataFrame({'Id' : df_train['Id'], 'answer' : df_train['Pawpularity']})

In [70]:
final_df = pd.merge(left = answer_df , right = pred_df, how = "inner", on = "Id")

In [71]:
final_df.to_csv('final_df.csv', index=False)

# Ensemble experiment

In [35]:
from itertools import combinations

debug = True
pred_list = ('val_predicts', 'val_svr_predicts', 'val_QR_predicts', 'val_Bayes_predicts', 'val_Simple_predicts')

cv_min_score = 10e+2

for i in range(1, len(pred_list)+1):
    
    comb = list(combinations(pred_list, i))
    
    for c in comb:
        swin_scores = []
        svr_scores = []
        qr_scores = []
        bayes_scores = []
        ens_scores = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
            early_stopper = EarlyStopper(patience = config.patience, mode='min')
            val_min_score = 1e+100
            model_save_dir = f'{drive_root}/output/weights/{config.model.name}_{config.save_discript}/'
            quantile_dir = f'{drive_root}/output/weights/{config.model.name}_{config.save_discript}/Bayes_fold{fold}.ckpt'
            train_df = df_train.loc[train_idx].reset_index(drop=True)
            val_df = df_train.loc[val_idx].reset_index(drop=True)

            val_predicts = np.load(f'{model_save_dir}val_predicts{fold}.npy')
            val_svr_predicts = np.load(f'{model_save_dir}val_svr_predicts{fold}.npy')
            val_QR_predicts = np.load(f'{model_save_dir}val_QR_predicts{fold}.npy')
            val_Bayes_predicts = np.load(f'{model_save_dir}val_Bayes_predicts{fold}.npy')
            val_Simple_predicts = np.load(f'{model_save_dir}val_Simple_predicts{fold}.npy')

            swin_score = mean_squared_error(val_df.Pawpularity,val_predicts)**0.5
            swin_scores.append(swin_score)

            svr_score = mean_squared_error(val_df.Pawpularity,val_svr_predicts)**0.5
            svr_scores.append(svr_score)

            qr_score = mean_squared_error(val_df.Pawpularity,val_QR_predicts)**0.5
            qr_scores.append(qr_score)

            bayes_score = mean_squared_error(val_df.Pawpularity, val_Bayes_predicts)**0.5
            bayes_scores.append(bayes_score)

            ensemble_predicts = np.zeros(val_predicts.shape[0])
            for item in c:
                ensemble_predicts += eval(item)
            ensemble_predicts = ensemble_predicts/len(c)
            ens_score = mean_squared_error(val_df.Pawpularity,ensemble_predicts)**0.5
            ens_scores.append(ens_score)
            
        cv_score = sum(ens_scores)/len(ens_scores)   
        if cv_score < cv_min_score:
            min_comb = c
            cv_min_score = cv_score
        if debug:
            print(c)
            print(f'cv score : {cv_score}')
            print('------------')
            
print(f'min combination : {min_comb}, score : {cv_min_score}')

('val_predicts',)
cv score : 17.498020690937622
------------
('val_svr_predicts',)
cv score : 17.579673884685686
------------
('val_QR_predicts',)
cv score : 17.422758685111415
------------
('val_Bayes_predicts',)
cv score : 17.394987698249714
------------
('val_Simple_predicts',)
cv score : 17.33018967573739
------------
('val_predicts', 'val_svr_predicts')
cv score : 17.453665233861358
------------
('val_predicts', 'val_QR_predicts')
cv score : 17.421277767367716
------------
('val_predicts', 'val_Bayes_predicts')
cv score : 17.387532910075855
------------
('val_predicts', 'val_Simple_predicts')
cv score : 17.363869075155137
------------
('val_svr_predicts', 'val_QR_predicts')
cv score : 17.444048232923297
------------
('val_svr_predicts', 'val_Bayes_predicts')
cv score : 17.37116993518172
------------
('val_svr_predicts', 'val_Simple_predicts')
cv score : 17.364519041502433
------------
('val_QR_predicts', 'val_Bayes_predicts')
cv score : 17.364583278701396
------------
('val_QR_pre