fork https://www.kaggle.com/nishantrajadhyaksha/pawpularity-pytorchlightning-w-b

In [1]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 539.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [2]:
import os
import warnings
from pprint import pprint
from glob import glob
from tqdm import tqdm
import timm

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision.transforms as T
from box import Box
from timm import create_model
from sklearn.model_selection import StratifiedKFold, train_test_split
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset
# from pytorch_grad_cam import GradCAMPlusPlus
# from pytorch_grad_cam.utils.image import show_cam_on_image


import pytorch_lightning as pl
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning import callbacks
from pytorch_lightning.callbacks.progress import ProgressBarBase
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning import LightningDataModule, LightningModule
# from pytorch_lightning.loggers import WandbLogger
import albumentations as A
from albumentations.pytorch import ToTensorV2
# import wandb
import cv2
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
import shutil
import os
import pickle
from torch.utils.data.sampler import WeightedRandomSampler
from torch.utils.data import TensorDataset
import time
warnings.filterwarnings("ignore")

In [3]:
# drive_root = '/content/drive/MyDrive/kaggle/pet_finder/'
drive_root = '..'

In [4]:
TRAIN_DIR = f"{drive_root}/input/petfinder-pawpularity-score/train"
TEST_DIR = f"{drive_root}/input/petfinder-pawpularity-score/test"
DENSE_FEATURES = [
    'Subject Focus',
    'Eyes',
    'Face',
    'Near',
    'Action',
    'Accessory',
    'Group',
    'Collage',
    'Human',
    'Occlusion',
    'Info',
    'Blur',
]

# import pandas as pd
df_train = pd.read_csv(f"{drive_root}/input/petfinder-pawpularity-score/train.csv")
df_test = pd.read_csv(f"{drive_root}/input/petfinder-pawpularity-score/test.csv")
feature_name = df_train.columns[1:-1]
print(feature_name)
df_train.head()

Index(['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group',
       'Collage', 'Human', 'Occlusion', 'Info', 'Blur'],
      dtype='object')


Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42
2,0013fd999caf9a3efe1352ca1b0d937e,0,1,1,1,0,0,0,0,1,1,0,0,28
3,0018df346ac9c1d8413cfcc888ca8246,0,1,1,1,0,0,0,0,0,0,0,0,15
4,001dc955e10590d3ca4673f034feeef2,0,0,0,1,0,0,1,0,0,0,0,0,72


In [5]:
df_train['filepath'] = df_train.Id.apply(lambda x :f"{TRAIN_DIR}/{x}.jpg" )
df_test['filepath'] = df_test.Id.apply(lambda x :f"{TEST_DIR}/{x}.jpg" )

In [6]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss
    
class BayesianLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
        
    def forward(self,yhat,y,std):
        ce = self.bce(yhat, y)
        inv_std = torch.exp(-std)
        mce = inv_std * ce
        loss = 0.5 * (mce + std).mean()
        return loss

    
class CutRelu(nn.Module):
    def __init__(self):
        super().__init__()
        
    def forward(self,x):
        x = torch.relu(x) - torch.relu(x-1.)
        return x

In [7]:
config = {'seed': 2021,
          'root': f"{drive_root}", 
          'n_splits': 10,#10,
          'epoch': 30,
          'trainer': {
              'gpus': -1,
              'auto_lr_find':False,
              'accumulate_grad_batches': 1,
              'progress_bar_refresh_rate': 1,
              'fast_dev_run': False,
              'num_sanity_val_steps': 0,
              'resume_from_checkpoint': None,
              'accelerator':'ddp',
              #'strategy':'ddp'
         },
          'transform':{
              'name': 'albu',
              'image_size': 224,#224,#384,
              'squared':True,
              'aug':'rotate,shift,scale,perspective,more',
          },
          'train_loader':{
              'batch_size': 100,
              'shuffle': True,
              'num_workers': 8,
              'pin_memory': False,
              'drop_last': True,
            
          },
          'val_loader': {
              'batch_size': 100,
              'shuffle': False,
              'num_workers': 8,
              'pin_memory': False,
              'drop_last': False
         },
          'save_discript':'Sigmoid_Bayesian_Test',
          'model':{
              'name': 'swin_tiny_patch4_window7_224',#'swin_tiny_patch4_window7_224',#'swin_large_patch4_window12_384_in22k', 'deit_base_distilled_patch16_384'
              'img_feature_dim':128,
              'output_dim': 1,
              'last_drop':0.5
          },
          'optimizer':{
              'name': 'optim.AdamW',
              'params':{
                  'lr': 1e-5
              },
          },
          'scheduler':{
              'name': 'optim.lr_scheduler.CosineAnnealingWarmRestarts',
              'params':{
                  'T_0': 4,
                  'T_mult':2,
                  'eta_min': 1e-7,
              }
          },
          'loss': 'BayesianLoss',
          'metric': 'RMSELoss',

          'svr':{
              'C':0.5
          },
          'svr_train':True,
          'swin_train':True
        
}

config = Box(config)

In [8]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB

train_transforms = A.Compose([
        A.LongestMaxSize(max_size= config.transform.image_size),
        A.PadIfNeeded(min_height=config.transform.image_size, min_width=config.transform.image_size, border_mode=cv2.BORDER_CONSTANT),
        A.OneOf([
                 # shift, scale, rotate 를 무작위로 적용합니다.
                 A.ShiftScaleRotate(shift_limit=(-0.2,0.2),scale_limit=(-0.2,0.1), rotate_limit=(-90,90)
                            , p=1.0, border_mode=cv2.BORDER_CONSTANT),
                 A.ShiftScaleRotate(shift_limit=(-0.2,0.2)
                            , p=1.0, border_mode=cv2.BORDER_CONSTANT),
                 A.ShiftScaleRotate(rotate_limit=(-90,90)
                            , p=1.0, border_mode=cv2.BORDER_CONSTANT),
                 A.ShiftScaleRotate(scale_limit=(-0.2,0.1)
                            , p=1.0, border_mode=cv2.BORDER_CONSTANT),
                 # affine 변환
                #  A.IAAAffine(shear=15, p=0.5, mode='constant')
        ], p=0.8),
        A.IAAPerspective(scale=(0.01, 0.15),p=0.3),
        A.OneOf([
            A.RandomContrast(limit=(-0.2,0.2), p=0.2),
            A.RandomGamma(gamma_limit=(80,120), eps=1e-07,p=0.2 )
        ],p=1.0),
        A.Cutout(max_h_size=8, max_w_size=8, num_holes=10, p=0.1),
        A.Downscale(scale_min = 0.8, scale_max=0.95, p=0.1),
        A.ISONoise(intensity=(0.1,0.5), color_shift=(0.01,0.05),p=0.1),
        A.OneOf([
            A.MotionBlur(blur_limit=(3,7),p=0.1),
             A.Blur(blur_limit=(3,7),p=0.1)],  p=1.0),
        
        A.HorizontalFlip(p=0.5),
        # blur
        # A.Blur(p=0.1),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
        ],
        )

valid_transforms = A.Compose([
        A.LongestMaxSize(max_size= config.transform.image_size),
        A.PadIfNeeded(min_height=config.transform.image_size, min_width=config.transform.image_size, border_mode=cv2.BORDER_CONSTANT),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2()
        ],
        )

def imbal_sampler(lables):
    lables = (lables/10).astype(int)
    class_count = np.bincount(lables.squeeze())
    class_weighting = 1./class_count
    sample_weights = class_weighting[lables]
    sampler = WeightedRandomSampler(sample_weights, len(lables))
    return sampler


class PetfinderDataset(Dataset):
    def __init__(self, df, cfg, transform):#, image_size=224):
        self._X = df["filepath"].values
        self._feature = df[DENSE_FEATURES].values
        self._y = None
        self._cfg = cfg
        if "Pawpularity" in df.keys():
            self._y = df["Pawpularity"].values
        self.transform = transform    

    def __len__(self):
        return len(self._X)

    def __getitem__(self, idx):
        image_path = self._X[idx]
        feature = self._feature[idx]
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image = self.transform(image=image)["image"] #self._transform(image)
        if self._y is not None:
            label = self._y[idx]
            return {'image':image,'feature':feature}, label

        return {'image':image,'feature':feature}

In [9]:
class PetfinderDataModule(LightningDataModule):
    def __init__(
        self,
        train_df,
        val_df,
        cfg,
    ):
        super().__init__()
        self._train_df = train_df
        self._val_df = val_df
        self._cfg = cfg
#         self.sampler = imbal_sampler( self._train_df.Pawpularity)

    def __create_dataset(self, train=True):
        if train==True:
            return PetfinderDataset(self._train_df, self._cfg.transform, train_transforms)
        else:
            return PetfinderDataset(self._val_df, self._cfg.transform, valid_transforms)

    def train_dataloader(self):
        dataset = self.__create_dataset(True)
        return DataLoader(dataset, **self._cfg.train_loader)

    def val_dataloader(self):
        dataset = self.__create_dataset(False)
        return DataLoader(dataset, **self._cfg.val_loader)

In [10]:
torch.autograd.set_detect_anomaly(True)
seed_everything(config.seed)

Global seed set to 2021


2021

In [11]:
class Model(pl.LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.__build_model()
        self._criterion = eval(self.cfg.loss)()
        self._metric = eval(self.cfg.metric)()
        self.save_hyperparameters(cfg)

    def __build_model(self):
        self.backbone = create_model(
            self.cfg.model.name, pretrained=True, num_classes=0, in_chans=3
        )
       
        num_features = self.backbone.num_features
        self.img_feature_head = nn.Sequential(
            nn.Dropout(self.cfg.model.last_drop), 
            nn.Linear(num_features, self.cfg.model.img_feature_dim),
            nn.SELU()
        )
        self.dense1 = nn.Sequential(
            nn.Dropout(0.1), 
            nn.Linear(12+self.cfg.model.img_feature_dim, 64),
            nn.SELU()
        )
        self.dense2 = nn.Linear(64, 2)
        self.last_activate = nn.Sigmoid()

    def forward(self, image, features, targets=None):
        x1 = self.backbone(image)
        x = self.img_feature_head(x1)
        x = torch.cat([x, features], dim=1)
        x = self.dense1(x)
        x = self.dense2(x)
        mean = x[:, 0]
        std = x[:, 1]
        return mean, std

    def predict(self, images, features):
        embed = self.backbone(images)
        embed2 = self.img_feature_head(embed)
        embed3 = torch.cat([embed2, features], dim=1)
        embed4 = self.dense1(embed3)
        x = self.dense2(embed4)
        mean = x[:, 0]
        
        pred = self.last_activate(mean)
        pred = pred.detach().cpu() * 100.
        pred = pred.numpy().squeeze()
        embed = embed.detach().cpu().numpy().squeeze()
        embed2 = embed2.detach().cpu().numpy().squeeze()
        embed3 = embed3.detach().cpu().numpy().squeeze()
        embed4 = embed4.detach().cpu().numpy().squeeze()
        return pred, embed, embed2,embed3, embed4

    def training_step(self, batch, batch_idx):
        loss, pred, labels, metric = self.__share_step(batch, 'train')
        self.log("train_loss",loss,on_step=False,on_epoch=True, sync_dist=True)
        self.log("train_metric",metric, on_step=False,on_epoch=True, prog_bar=True, sync_dist=True)
        return {'loss': loss, 'pred': pred, 'labels': labels, 'metric':metric}
        
    def validation_step(self, batch, batch_idx):
        loss, pred, labels, metric = self.__share_step(batch, 'val')
        self.log("val_loss",loss,on_step=False,on_epoch=True, sync_dist=True)
        self.log("val_metric",metric, on_step=False,on_epoch=True, prog_bar=True, sync_dist=True)
        return {'pred': pred, 'labels': labels,'val_loss':loss, 'val_metric':metric}
    
    def __share_step(self, batch, mode):
        X, labels = batch
        labels = labels.float() / 100.0        
        logits, std = self.forward(X['image'],X['feature'])
        loss = self._criterion(logits, labels, std)
        pred = self.last_activate(logits)
        
        pred = pred.detach().cpu() * 100.
        labels = labels.detach().cpu() * 100.
        metric = self._metric(pred,labels)
        return loss, pred, labels, metric
    
    
    def configure_optimizers(self):
        optimizer = eval(self.cfg.optimizer.name)(
        self.parameters(), **self.cfg.optimizer.params
        )
        scheduler = eval(self.cfg.scheduler.name)(
        optimizer,
        **self.cfg.scheduler.params
        )
        return [optimizer], [scheduler]

In [12]:
skf = StratifiedKFold(
    n_splits=config.n_splits, shuffle=True, random_state=config.seed
)

swin_scores=[]
svr_scores=[]
ens_scores= []

for fold, (train_idx, val_idx) in enumerate(skf.split(df_train["Id"], df_train["Pawpularity"])):
    model_save_dir = f'{drive_root}/output/weights/{config.model.name}_{config.save_discript}/'
    train_df = df_train.loc[train_idx].reset_index(drop=True)
    val_df = df_train.loc[val_idx].reset_index(drop=True)
    swin_score=0
    # train_df, val_df = train_test_split(df_train, test_size=0.15, random_state=config['seed'], stratify = df_train['Pawpularity']) # seed 2021
    if config.swin_train==True:
        datamodule = PetfinderDataModule(train_df, val_df, config)
        model = Model(config)
        print('loaded model')
        earystopping = EarlyStopping(monitor="val_metric", patience=5, verbose=False, mode="min")
        lr_monitor = callbacks.LearningRateMonitor()
        loss_checkpoint = callbacks.ModelCheckpoint(
            filename="best_loss",
            monitor="val_metric",
            save_top_k=1,
            mode="min",
            save_last=False,
        )
        

        trainer = pl.Trainer(
#             logger=wandb_logger,
            callbacks=[lr_monitor,earystopping, loss_checkpoint],
            default_root_dir=f"{drive_root}/output/checkpoints",
            max_epochs=config.epoch,
            #strategy = 'ddp',
            **config.trainer,
        )
        trainer.fit(model, datamodule=datamodule)

        
        if trainer.global_rank==0:
            if  os.path.isdir(model_save_dir)==False:
                os.makedirs(model_save_dir)
            shutil.copyfile(loss_checkpoint.best_model_path, f'{model_save_dir}swin_fold{fold}.ckpt')
            swin_score = loss_checkpoint.best_model_score.cpu().numpy()
            swin_scores.append(swin_score)

    ## SVR
#     time.sleep(30)
    if trainer.global_rank==0:
        model = Model.load_from_checkpoint(f'{model_save_dir}swin_fold{fold}.ckpt', cfg = config)
        model = model.to("cuda")
        model.eval()
        model.freeze()

        clf_path = f'{model_save_dir}SVR_fold{fold}.ckpt' 
        val_predicts = []
        val_embed_features = []
        train_predicts = []
        train_embed_features = []

        if config.svr_train==True:
            train_dataloader = DataLoader(PetfinderDataset(train_df, config.transform, valid_transforms), **config.val_loader)
            for data in train_dataloader:
                y_preds, embed, embed2,embed3, embed4 = model.predict(data[0]['image'].to("cuda"),data[0]['feature'].to("cuda"))
                train_predicts.append(y_preds)
                train_embed_features.append(embed)
            embed_train_X = np.concatenate(train_embed_features)
            train_predicts = np.concatenate(train_predicts)
            # con_embed_train_X = np.concatenate([embed_train_X,np.expand_dims(train_predicts, axis=-1)], axis=1)

        val_dataloader = DataLoader(PetfinderDataset(val_df, config.transform, valid_transforms), **config.val_loader)
        for data in val_dataloader:
            y_preds, embed, embed2,embed3, embed4 = model.predict(data[0]['image'].to("cuda"),data[0]['feature'].to("cuda"))
            print('pred shape',  y_preds.shape)
            print('image shape', data[0]['image'].shape)
            print('feature shape', data[0]['feature'].shape)
            val_predicts.append(y_preds)
            val_embed_features.append(embed)

        val_predicts = np.concatenate(val_predicts)
        embed_val_X = np.concatenate(val_embed_features)

        if  config.svr_train==True:
            clf = SVR(C=config.svr.C)
            svr_train_arr = np.concatenate([embed_train_X, train_df[DENSE_FEATURES].values],axis=1)
            clf.fit(svr_train_arr, train_df.Pawpularity.values.astype('int32'))
            pickle.dump(clf, open(clf_path, "wb"))
        else:
            clf = pickle.load(open(clf_path, "rb"))

        swin_score =  mean_squared_error(val_df.Pawpularity, val_predicts)**0.5
        svr_val_arr = np.concatenate([embed_val_X, val_df[DENSE_FEATURES].values],axis=1)
        svr_preds = clf.predict(svr_val_arr)
        svr_score = mean_squared_error(val_df.Pawpularity,svr_preds)**0.5
        ens_preds = (val_predicts + svr_preds)/2
        ens_score = mean_squared_error(val_df.Pawpularity,ens_preds)**0.5

        svr_scores.append(svr_score)
        ens_scores.append(ens_score)
        swin_scores.append(swin_score)
        print(f'fold{fold} score swin : {swin_score:.2f}, svr : {svr_score:.2f}, ens : {ens_score:.2f}')


loaded model


MisconfigurationException: Selected distributed backend ddp is not compatible with an interactive environment. Run your code as a script, or choose one of the compatible backends: dp, ddp_spawn, ddp_sharded_spawn, tpu_spawn

In [None]:
if trainer.global_rank==0:
    score_path =  f'{drive_root}/output/score_history.csv'
    cur_score_df = pd.json_normalize(config)
    cur_score_df['swin_mean_score'] = np.mean(swin_scores)
    cur_score_df['svr_mean_score'] = np.mean(svr_scores)
    cur_score_df['ens_mean_score'] = np.mean(ens_scores)
    cur_score_df['model_path'] = model_save_dir

    score_path =  f'{drive_root}/output/score_history.csv'
    if os.path.isfile(score_path):
        score_df = pd.read_csv(score_path)
        cur_score_df = pd.concat([score_df,cur_score_df])

    cur_score_df.to_csv(score_path, index=False)
    print(f'avg ensemble score : {np.mean(ens_scores)}')