- https://www.kaggle.com/abhishek/very-simple-pytorch-training-0-59?scriptVersionId=16436961
- https://www.kaggle.com/abhishek/pytorch-inference-kernel-lazy-tta

In [1]:
dbg = True
if dbg:
    dbgtrnsz=500
    dbgvalsz=500

In [2]:
PRFX = 'devCv0701'
SEED = 111
SZ = (256, 256)
BSZ = 64
BSZ_INFER = BSZ*2
N_EPOCHS = 2

# setup

In [3]:
import random 
import numpy as np
import torch
import os
import datetime

def set_torch_seed(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed) 
        torch.backends.cudnn.deterministic = True 
        torch.backends.cudnn.benchmark = False

set_torch_seed()

In [4]:
import pandas as pd
from collections import Counter
import time
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import mean_squared_error
from sklearn.metrics import cohen_kappa_score
from functools import partial
import scipy as sp

from torch.utils.data import Dataset
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.optim import lr_scheduler

from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True


from pathlib import Path
p_o = f'../output/{PRFX}'
Path(p_o).mkdir(exist_ok=True, parents=True)

In [5]:
def quadratic_weighted_kappa(y1, y2):
    return cohen_kappa_score(y1, y2, weights='quadratic')

# preprocess

In [6]:
img2grd = []

In [7]:
p = '../input/aptos2019-blindness-detection'
pp = Path(p)
train = pd.read_csv(pp/'train.csv')

len_blnd = len(train)

img2grd_blnd = [(f'{p}/train_images/{o[0]}.png',o[1])  for o in train.values]

img2grd += img2grd_blnd
display(len(img2grd))
display(Counter(o[1] for o in img2grd).most_common())

3662

[(0, 1805), (2, 999), (1, 370), (4, 295), (3, 193)]

In [8]:
p = '../input/diabetic-retinopathy-detection'
pp = Path(p)
train=pd.read_csv(pp/'trainLabels.csv')
test=pd.read_csv(pp/'retinopathy_solution.csv')

img2grd_diab_train=[(f'{p}/train_images/{o[0]}.jpeg',o[1])  for o in train.values]
img2grd_diab_test=[(f'{p}/test_images/{o[0]}.jpeg',o[1])  for o in test.values]
img2grd += img2grd_diab_train
display(len(img2grd))
display(Counter(o[1] for o in img2grd).most_common())
img2grd += img2grd_diab_test
len(img2grd)
display(Counter(o[1] for o in img2grd).most_common())

38788

[(0, 27615), (2, 6291), (1, 2813), (3, 1066), (4, 1003)]

[(0, 67148), (2, 14152), (1, 6575), (3, 2280), (4, 2209)]

In [9]:
p = '../input/IDRID/B. Disease Grading'
pp = Path(p)
train=pd.read_csv(pp/'2. Groundtruths/a. IDRiD_Disease Grading_Training Labels.csv')
test=pd.read_csv(pp/'2. Groundtruths/b. IDRiD_Disease Grading_Testing Labels.csv')

img2grd_idrid_train=[(f'{p}/1. Original Images/a. Training Set/{o[0]}.jpg',o[1])  for o in train.values]
img2grd_idrid_test=[(f'{p}/1. Original Images/b. Testing Set/{o[0]}.jpg',o[1])  for o in test.values]
img2grd += img2grd_idrid_train
display(len(img2grd))
display(Counter(o[1] for o in img2grd).most_common())
img2grd += img2grd_idrid_test
len(img2grd)
display(Counter(o[1] for o in img2grd).most_common())

92777

[(0, 67282), (2, 14288), (1, 6595), (3, 2354), (4, 2258)]

[(0, 67316), (2, 14320), (1, 6600), (3, 2373), (4, 2271)]

In [10]:
img2grd = np.array(img2grd)

In [11]:
if np.all([Path(o[0]).exists() for o in img2grd]): print('All files are here!')

All files are here!


# dataset

In [12]:
set_torch_seed()
idx_val = range(len_blnd)
idx_trn = range(len_blnd, len(img2grd))

img2grd_trn = img2grd[idx_trn]
img2grd_val = img2grd[idx_val]

display(len(img2grd_trn), len(img2grd_val))

img2grd_trn[:3], img2grd_val[:3]

89218

3662

(array([['../input/diabetic-retinopathy-detection/train_images/10_left.jpeg',
         '0'],
        ['../input/diabetic-retinopathy-detection/train_images/10_right.jpeg',
         '0'],
        ['../input/diabetic-retinopathy-detection/train_images/13_left.jpeg',
         '0']], dtype='<U82'),
 array([['../input/aptos2019-blindness-detection/train_images/000c1434d8d7.png',
         '2'],
        ['../input/aptos2019-blindness-detection/train_images/001639a390f0.png',
         '4'],
        ['../input/aptos2019-blindness-detection/train_images/0024cdab0c1e.png',
         '1']], dtype='<U82'))

In [13]:
if dbg:
    img2grd_trn = img2grd_trn[:dbgtrnsz]
    img2grd_val = img2grd_val[:dbgvalsz]

In [14]:
class BlndDataset(Dataset):
    def __init__(self, img2grd, transform):
        self.img2grd = img2grd
        self.transform = transform

    def __len__(self):
        return len(self.img2grd)

    def __getitem__(self, idx):
        img,grd = img2grd[idx]
        image = self.transform(Image.open(img))
        label = torch.tensor(int(grd))
        return image, label

transform_train = transforms.Compose([
    transforms.Resize(SZ),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds_trn = BlndDataset(img2grd_trn, transform=transform_train)
ds_val = BlndDataset(img2grd_val, transform=transform_train)

data_loader = torch.utils.data.DataLoader(ds_trn, batch_size=BSZ, shuffle=True, num_workers=0)
data_loader_val = torch.utils.data.DataLoader(ds_val, batch_size=BSZ_INFER, shuffle=False, num_workers=0)

# model

In [15]:
model = torchvision.models.resnet50(pretrained=False)
model.load_state_dict(torch.load("../input/pytorch_models/resnet50-19c8e357.pth"));

In [16]:
# model.fc = nn.Linear(2048, 1)
model.fc = nn.Sequential(
                          nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                          nn.Dropout(p=0.25),
                          nn.Linear(in_features=2048, out_features=2048, bias=True),
                          nn.ReLU(),
                          nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                          nn.Dropout(p=0.5),
                          nn.Linear(in_features=2048, out_features=1, bias=True),
                         )

device = torch.device("cuda")

model = model.to(device)

In [17]:
plist = [
         {'params': model.layer4.parameters(), 'lr': 1e-4, 'weight': 0.001},
         {'params': model.fc.parameters(), 'lr': 1e-3}
         ]

optimizer = optim.Adam(plist, lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10)

# Training Loop

In [18]:
len_dl = len(data_loader)
len_ds = len(ds_trn)
len_dl_val = len(data_loader_val)
y_val = np.array([int(o[1]) for o in ds_val.img2grd])[:,None]

since = time.time()
criterion = nn.MSELoss()

set_torch_seed()
for epoch in range(N_EPOCHS):
    print(f'Epoch {epoch}/{N_EPOCHS-1}')
    scheduler.step()
    model.train()
    running_loss = 0.0
    running_n = 0
    for step, d in enumerate(data_loader):
        inputs = d[0]
        labels = d[1].view(-1, 1)
        inputs = inputs.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.float)
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        running_n += inputs.size(0)
        if (step) % (2 if dbg else 100) == 0: 
            print(f'[{datetime.datetime.now()}] epoch-{epoch} step-{step}/{len_dl} loss: {running_loss/running_n:.5f}')
    epoch_loss = running_loss / len_ds
    
    ###### val #######
    model.eval()
    preds_val = np.zeros((len(ds_val), 1))
    for step, d in enumerate(data_loader_val):
        if (step) % (2 if dbg else 100) == 0: 
            print(f'[{datetime.datetime.now()}] epoch-{epoch} val step-{step}/{len_dl_val}')
        inputs = d[0]
        inputs = inputs.to(device, dtype=torch.float)
        with torch.no_grad(): outputs = model(inputs)
        preds_val[step*BSZ_INFER:(step+1)*BSZ_INFER] = outputs.detach().cpu().squeeze().numpy()[:,None]#.ravel().reshape(-1, 1)
    
    mse_val = mean_squared_error(preds_val, y_val)        
    print(f'Training Loss: {epoch_loss:.4f}; Val Loss: {mse_val:.4f}')

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
torch.save(model.state_dict(), f"{p_o}/model.bin")

Epoch 0/1
[2019-07-01 13:59:04.010033] epoch-0 step-2/8 loss: 4.28030
[2019-07-01 13:59:20.932850] epoch-0 step-4/8 loss: 3.45860
[2019-07-01 13:59:39.011240] epoch-0 step-6/8 loss: 3.11528
[2019-07-01 13:59:55.017603] epoch-0 step-8/8 loss: 2.97926
[2019-07-01 14:00:29.551211] val step-2/4
[2019-07-01 14:01:01.658754] val step-4/4
Training Loss: 2.9793; Val Loss: 1.9403
Epoch 1/1
[2019-07-01 14:01:18.658523] epoch-1 step-2/8 loss: 1.86039
[2019-07-01 14:01:36.345851] epoch-1 step-4/8 loss: 1.76353
[2019-07-01 14:01:52.939712] epoch-1 step-6/8 loss: 1.66474
[2019-07-01 14:02:09.974656] epoch-1 step-8/8 loss: 1.69261
[2019-07-01 14:02:44.434517] val step-2/4


KeyboardInterrupt: 

# validate

In [23]:
y_val = np.array([int(o[1]) for o in ds_val.img2grd])[:,None]
len_dl_val = len(data_loader_val)
model.eval()
preds_val = np.zeros((len(ds_val), 1))
for step, d in enumerate(data_loader_val):
    if (step) % (2 if dbg else 100) == 0: 
        print(f'[{datetime.datetime.now()}] epoch-{epoch} val step-{step}/{len_dl_val}')
    inputs = d[0]
    inputs = inputs.to(device, dtype=torch.float)
    with torch.no_grad(): outputs = model(inputs)
    preds_val[step*BSZ_INFER:(step+1)*BSZ_INFER] = outputs.detach().cpu().squeeze().numpy()[:,None]#.ravel().reshape(-1, 1)
mse_val = mean_squared_error(preds_val, y_val)        
print(f'Val Loss: {mse_val:.4f}')

[2019-07-01 14:06:37.566266] epoch-1 val step-0/4
[2019-07-01 14:07:10.573665] epoch-1 val step-2/4
Training Loss: 1.6926; Val Loss: 1.4103


# threshold selection

In [31]:
# https://www.kaggle.com/c/petfinder-adoption-prediction/discussion/88773#latest-515044
# We used OptimizedRounder given by hocop1. https://www.kaggle.com/c/petfinder-adoption-prediction/discussion/76107#480970
# put numerical value to one of bins
def to_bins(x, borders):
    for i in range(len(borders)):
        if x <= borders[i]:
            return i
    return len(borders)

class Hocop1OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _loss(self, coef, X, y, idx):
        X_p = np.array([to_bins(pred, coef) for pred in X])
        ll = -quadratic_weighted_kappa(y, X_p)
        return ll

    def fit(self, X, y):
        coef = [1.5, 2.0, 2.5, 3.0]
        golden1 = 0.618
        golden2 = 1 - golden1
        ab_start = [(1, 2), (1.5, 2.5), (2, 3), (2.5, 3.5)]
        for it1 in range(10):
            for idx in range(4):
                # golden section search
                a, b = ab_start[idx]
                # calc losses
                coef[idx] = a
                la = self._loss(coef, X, y, idx)
                coef[idx] = b
                lb = self._loss(coef, X, y, idx)
                for it in range(20):
                    # choose value
                    if la > lb:
                        a = b - (b - a) * golden1
                        coef[idx] = a
                        la = self._loss(coef, X, y, idx)
                    else:
                        b = b - (b - a) * golden2
                        coef[idx] = b
                        lb = self._loss(coef, X, y, idx)
        self.coef_ = {'x': coef}

    def predict(self, X, coef):
        X_p = np.array([to_bins(pred, coef) for pred in X])
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [32]:
# https://www.kaggle.com/c/petfinder-adoption-prediction/discussion/76107#480970
class AbhishekOptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0

    def _kappa_loss(self, coef, X, y):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4

        ll = quadratic_weighted_kappa(y, X_p)
        return -ll

    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        X_p = np.copy(X)
        for i, pred in enumerate(X_p):
            if pred < coef[0]:
                X_p[i] = 0
            elif pred >= coef[0] and pred < coef[1]:
                X_p[i] = 1
            elif pred >= coef[1] and pred < coef[2]:
                X_p[i] = 2
            elif pred >= coef[2] and pred < coef[3]:
                X_p[i] = 3
            else:
                X_p[i] = 4
        return X_p

    def coefficients(self):
        return self.coef_['x']

In [33]:
def bucket(preds_raw, coef = [0.5, 1.5, 2.5, 3.5]):
    preds = np.zeros(preds_raw.shape)
    for i, pred in enumerate(preds_raw):
        if pred < coef[0]:
            preds[i] = 0
        elif pred >= coef[0] and pred < coef[1]:
            preds[i] = 1
        elif pred >= coef[1] and pred < coef[2]:
            preds[i] = 2
        elif pred >= coef[2] and pred < coef[3]:
            preds[i] = 3
        else:
            preds[i] = 4
    return preds

In [34]:
optnm2coefs = {'simple': [0.5, 1.5, 2.5, 3.5]}

In [35]:
%%time
optR = Hocop1OptimizedRounder()
optR.fit(preds_val, y_val)
optnm2coefs['hocop1'] = optR.coefficients()

CPU times: user 1.73 s, sys: 12 µs, total: 1.73 s
Wall time: 1.73 s


In [36]:
%%time
optR = AbhishekOptimizedRounder()
optR.fit(preds_val, y_val)
optnm2coefs['abhishek'] = optR.coefficients()

CPU times: user 622 ms, sys: 12 µs, total: 622 ms
Wall time: 620 ms


In [40]:
optnm2coefs

{'simple': [0.5, 1.5, 2.5, 3.5],
 'hocop1': [1.0000660342883434,
  1.5000660342883434,
  2.000066034288343,
  2.500066034288343],
 'abhishek': array([ 0.1581871 , -2.70365979,  4.59232128,  7.62055004])}

In [37]:
optnm2preds_val_grd = {k: bucket(preds_val, coef) for k,coef in optnm2coefs.items()}

In [38]:
optnm2qwk = {k: quadratic_weighted_kappa(y_val, preds) for k,preds in optnm2preds_val_grd.items()}

In [39]:
optnm2qwk

{'simple': 0.454718245127518,
 'hocop1': 0.28424879814303483,
 'abhishek': 0.7640877563778168}