In [1]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [2]:
import os
import gc
import cv2
import copy
import time
import yaml
import random
import shutil
import warnings
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob as glob_file
from tqdm import tqdm
from PIL import Image, ImageDraw
from shutil import copyfile
from IPython.core.display import Video, display
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, KFold, GroupKFold, StratifiedKFold

import timm
import torch
from fastai.vision.all import *

warnings.simplefilter('ignore')
pd.set_option("max_columns", 150)
pd.set_option('display.max_rows', 150)

In [3]:
#timm.list_models()

# Configurations

In [4]:
CFG = {
    "save_prev"     : [True, ["petfinder_*.pth"]],
    "seed"          : 42,
    'device'        : "cuda:0" if torch.cuda.is_available() else "cpu",
    "input_img"     : '../input/tensorflow-great-barrier-reef/train_images/',
    "input_path"    : '../input/tensorflow-great-barrier-reef/train.csv',
    "output_path"   : './',
    "save_name"     : "gbr_swin_binary_fastai",
    "model"         : "swin_large_patch4_window7_224_in22k",
    "loss"          : "binary",
    "size"          : 224,
    "batch_size"    : 32,
    "n"             : 6000,
    "epochs"        : 10,
    "lr"            : 0.0002,
    'early_stopping': 3,
    "num_workers"   : 4
}

CFG

{'save_prev': [True, ['petfinder_*.pth']],
 'seed': 42,
 'device': 'cuda:0',
 'input_img': '../input/tensorflow-great-barrier-reef/train_images/',
 'input_path': '../input/tensorflow-great-barrier-reef/train.csv',
 'output_path': './',
 'save_name': 'gbr_swin_binary_fastai',
 'model': 'swin_large_patch4_window7_224_in22k',
 'loss': 'binary',
 'size': 224,
 'batch_size': 32,
 'n': 6000,
 'epochs': 10,
 'lr': 0.0002,
 'early_stopping': 3,
 'num_workers': 4}

In [5]:
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

def seed_everything(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
seed_everything(CFG["seed"])

# Load data

In [6]:
df = pd.read_csv(CFG["input_path"])
df["path"]  = CFG['input_img'] + "video_" + df.video_id.astype(str) + "/" + df.video_frame.astype(str) + ".jpg"
df["label"] = df.annotations.apply(lambda x: x != "[]").astype(int)

print(df.shape)
print(df.label.value_counts())
df.head()

(23501, 8)
0    18582
1     4919
Name: label, dtype: int64


Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,path,label
0,0,40258,0,0,0-0,[],../input/tensorflow-great-barrier-reef/train_images/video_0/0.jpg,0
1,0,40258,1,1,0-1,[],../input/tensorflow-great-barrier-reef/train_images/video_0/1.jpg,0
2,0,40258,2,2,0-2,[],../input/tensorflow-great-barrier-reef/train_images/video_0/2.jpg,0
3,0,40258,3,3,0-3,[],../input/tensorflow-great-barrier-reef/train_images/video_0/3.jpg,0
4,0,40258,4,4,0-4,[],../input/tensorflow-great-barrier-reef/train_images/video_0/4.jpg,0


In [7]:
def prepare_dataloader(df):
    df = df.copy()
    
    dataloader = ImageDataLoaders.from_df(
        df,
        valid_col='is_valid',
        seed=CFG["seed"],
        fn_col='path',
        label_col="label",
        y_block=RegressionBlock,
        bs=CFG['batch_size'],
        n=CFG['n'],
        shuffle=True,
        num_workers=CFG['num_workers'],
        item_tfms=Resize(CFG['size']),
        batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation(), Flip()])
    )
    return dataloader

def gbr_accuracy(input, target):
    return ((F.sigmoid(input.flatten()) > 0.5) == target).float().mean()

In [8]:
class TransformerModel(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        self.model   = timm.create_model(model_name, pretrained=pretrained, num_classes=0, in_chans=3)
        num_features = self.model.num_features
        self.linear  = nn.Linear(num_features, 1)

    def forward(self, x):
        x = self.model(x)
        output = self.linear(x)
        return output

def get_learner(df):
    dataloader = prepare_dataloader(df)
    model   = TransformerModel(CFG["model"], pretrained=True)
    learner = Learner(
        dataloader,
        model,
        loss_func=BCEWithLogitsLossFlat(),
        metrics=AccumMetric(gbr_accuracy)).to_fp16()
    return learner

In [9]:
valid_idx = []
for v in df.video_id.unique():
    df_this_video = df[df.video_id == v].copy()
    n_half = int(df_this_video.shape[0]/3)
    valid_idx += list(df_this_video.tail(n_half).index)

df["is_valid"] = 0
df.loc[valid_idx, "is_valid"] = 1

In [10]:
df.groupby(["is_valid","label"]).label.count()

is_valid  label
0         0        12403
          1         3265
1         0         6179
          1         1654
Name: label, dtype: int64

In [11]:
#get_learner(df).lr_find(end_lr=3e-2)
#SuggestedLRs(valley=0.00024877983378246427)

In [12]:
all_preds = []

learn = get_learner(df)
learn.fit_one_cycle(
    CFG["epochs"],
    CFG["lr"],
    cbs=[SaveModelCallback(),
         EarlyStoppingCallback(monitor='valid_loss',
                               patience=CFG['early_stopping'])]
)
shutil.move("./models/model.pth", f"{CFG['output_path']}{CFG['save_name']}.pth")

del learn
torch.cuda.empty_cache()
gc.collect()
    
!rm -rf ./models

Downloading: "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth" to /root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22k.pth


epoch,train_loss,valid_loss,gbr_accuracy,time
0,0.455363,0.610386,0.778629,09:16
1,0.286974,1.082221,0.554194,08:59
2,0.179076,1.217251,0.758458,08:49
3,0.101776,1.577968,0.610877,08:48


Better model found at epoch 0 with valid_loss value: 0.6103855967521667.
No improvement since epoch 0: early stopping
