- https://www.kaggle.com/abhishek/very-simple-pytorch-training-0-59?scriptVersionId=16436961
- https://www.kaggle.com/abhishek/pytorch-inference-kernel-lazy-tta

# predict test

In [3]:
PRFX = 'devCv0701'
SEED = 111
SZ = (256, 256)
BSZ = 112
BSZ_INFER = BSZ*2
N_EPOCHS = 2

## setup

In [43]:
import numpy as np
import pandas as pd
import time
import datetime
from collections import Counter

from torch.utils.data import Dataset
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.optim import lr_scheduler

from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True

from pathlib import Path
p_o = f'../output/{PRFX}'


## preprocess

In [60]:
p = '../input/aptos2019-blindness-detection'
pp = Path(p)
test = pd.read_csv(pp/'test.csv')

img2grd_tst = [(f'{p}/test_images/{o[0]}.png', -1)  for o in test.values]
img2grd_tst[:5]

[('../input/aptos2019-blindness-detection/test_images/0005cfc8afb6.png', -1),
 ('../input/aptos2019-blindness-detection/test_images/003f0afdcd15.png', -1),
 ('../input/aptos2019-blindness-detection/test_images/006efc72b638.png', -1),
 ('../input/aptos2019-blindness-detection/test_images/00836aaacf06.png', -1),
 ('../input/aptos2019-blindness-detection/test_images/009245722fa4.png', -1)]

## dataset

In [33]:
class BlndDataset(Dataset):
    def __init__(self, img2grd, transform):
        self.img2grd = img2grd
        self.transform = transform

    def __len__(self):
        return len(self.img2grd)

    def __getitem__(self, idx):
        img,grd = self.img2grd[idx]
        image = self.transform(Image.open(img))
        label = torch.tensor(int(grd))
        return image, label

transform_test = transforms.Compose([
    transforms.Resize(SZ),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds_tst = BlndDataset(img2grd_tst, transform=transform_test)
data_loader_tst = torch.utils.data.DataLoader(ds_tst, batch_size=BSZ_INFER, shuffle=False, num_workers=0)

## model

In [20]:
model = torchvision.models.resnet50(pretrained=False)
# model.fc = nn.Linear(2048, 1)
model.fc = nn.Sequential(
                          nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                          nn.Dropout(p=0.25),
                          nn.Linear(in_features=2048, out_features=2048, bias=True),
                          nn.ReLU(),
                          nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                          nn.Dropout(p=0.5),
                          nn.Linear(in_features=2048, out_features=1, bias=True),
                         )


model.load_state_dict(torch.load(f"{p_o}/model.bin"));

In [21]:
device = torch.device("cuda")
model = model.to(device)

In [23]:
for param in model.parameters():
    param.requires_grad = False

model.eval();

## run predicting test

In [36]:
len_dl_tst = len(data_loader_tst)

since = time.time()

preds_tst = np.zeros((len(ds_tst), 1))
for step, d in enumerate(data_loader_tst):
    if (step) % 10 == 0: 
        print(f'[{datetime.datetime.now()}] step-{step}/{len_dl_tst}')
    inputs = d[0]
    inputs = inputs.to(device, dtype=torch.float)
    with torch.no_grad(): outputs = model(inputs)
    preds_tst[step*BSZ_INFER:(step+1)*BSZ_INFER] = outputs.detach().cpu().squeeze().numpy()[:,None]#.ravel().reshape(-1, 1)

time_elapsed = time.time() - since
print('Test complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

[2019-07-01 15:31:30.283201] step-0/9
Training complete in 1m 24s


In [37]:
preds_tst

array([[0.90436178],
       [2.56440163],
       [2.37508965],
       ...,
       [2.49047279],
       [3.41678309],
       [1.4274677 ]])

In [40]:
def bucket(preds_raw, coef = [0.5, 1.5, 2.5, 3.5]):
    preds = np.zeros(preds_raw.shape)
    for i, pred in enumerate(preds_raw):
        if pred < coef[0]:
            preds[i] = 0
        elif pred >= coef[0] and pred < coef[1]:
            preds[i] = 1
        elif pred >= coef[1] and pred < coef[2]:
            preds[i] = 2
        elif pred >= coef[2] and pred < coef[3]:
            preds[i] = 3
        else:
            preds[i] = 4
    return preds

In [39]:
# {'simple': [0.5, 1.5, 2.5, 3.5],
#  'hocop1': [1.0000660342883434,
#   1.5000660342883434,
#   2.4720092552779804,
#   3.070521598739617],
#  'abhishek': array([0.55828336, 1.40620064, 2.47957064, 3.0831964 ])}
coef = [0.55828336, 1.40620064, 2.47957064, 3.0831964 ]

preds_tst_grd = bucket(preds_tst, coef)

In [55]:
preds_tst_grd.squeeze()

array([1., 3., 2., ..., 3., 4., 2.])

In [56]:
Counter(preds_tst_grd.squeeze()).most_common()

[(2.0, 1054), (1.0, 460), (3.0, 246), (0.0, 92), (4.0, 76)]

## submit

In [57]:
sample = pd.read_csv("../input/aptos2019-blindness-detection/sample_submission.csv")
sample.diagnosis = preds_tst_grd.squeeze().astype(int)
sample.head()

Unnamed: 0,id_code,diagnosis
0,0005cfc8afb6,1
1,003f0afdcd15,3
2,006efc72b638,2
3,00836aaacf06,2
4,009245722fa4,2


In [59]:
sample.to_csv(f"{p_o}/submission.csv", index=False)