In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import sys
import random
import math
from collections import OrderedDict
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from matplotlib import pyplot as plt
import scipy.ndimage as ndi
from PIL import Image
from sklearn.model_selection import KFold, StratifiedKFold
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, models, transforms
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
sys.path.append("../input/pretrainedmodels/pretrainedmodels-0.7.4")

In [None]:
import pretrainedmodels

In [None]:
def scale_radius(src, img_size, padding=False):
    x = src[src.shape[0] // 2, ...].sum(axis=1)
    r = (x > x.mean() / 10).sum() // 2
    yx = src.sum(axis=2)
    region_props = measure.regionprops((yx > yx.mean() / 10).astype('uint8'))
    yc, xc = np.round(region_props[0].centroid).astype('int')
    x1 = max(xc - r, 0)
    x2 = min(xc + r, src.shape[1] - 1)
    y1 = max(yc - r, 0)
    y2 = min(yc + r, src.shape[0] - 1)
    dst = src[y1:y2, x1:x2]
    dst = cv2.resize(dst, dsize=None, fx=img_size/(2*r), fy=img_size/(2*r))
    if padding:
        pad_x = (img_size - dst.shape[1]) // 2
        pad_y = (img_size - dst.shape[0]) // 2
        dst = np.pad(dst, ((pad_y, pad_y), (pad_x, pad_x), (0, 0)), 'constant')
    return dst

    
class Dataset(torch.utils.data.Dataset):
    def __init__(self, img_paths, labels, transform=None, img_size=288, save_img=False):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform
        self.img_size = img_size
        self.save_img = save_img

    def __getitem__(self, index):
        img_path, label = self.img_paths[index], self.labels[index]
        if os.path.exists('processed/%s' %os.path.basename(img_path)):
            img = cv2.imread('processed/%s' %os.path.basename(img_path))

        else:
            img = cv2.imread(img_path)
            try:
                img = scale_radius(img, img_size=self.img_size, padding=False)
            except Exception as e:
                img = img
            if self.save_img:
                cv2.imwrite('processed/%s' %os.path.basename(img_path), img)
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        return img, label


    def __len__(self):
        return len(self.img_paths)


def get_model(model_name='resnet18', num_outputs=None, pretrained=False,
              freeze_bn=False, dropout_p=0, **kwargs):

    pretrained = 'imagenet' if pretrained else None
    model = pretrainedmodels.__dict__[model_name](num_classes=1000,
                                                  pretrained=pretrained)

    if 'dpn' in model_name:
        in_channels = model.last_linear.in_channels
        model.last_linear = nn.Conv2d(in_channels, num_outputs,
                                      kernel_size=1, bias=True)
    else:
        if 'resnet' in model_name:
            model.avgpool = nn.AdaptiveAvgPool2d(1)
        else:
            model.avg_pool = nn.AdaptiveAvgPool2d(1)
        in_features = model.last_linear.in_features
        if dropout_p == 0:
            model.last_linear = nn.Linear(in_features, num_outputs)
        else:
            model.last_linear = nn.Sequential(
                nn.Dropout(p=dropout_p),
                nn.Linear(in_features, num_outputs),
            )

    if freeze_bn:
        for m in model.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.weight.requires_grad = False
                m.bias.requires_grad = False

    return model

In [None]:
os.makedirs('processed', exist_ok=True)

# Train & Inference

In [None]:
l1_probs = {}

## SE-ResNeXt50_32x4d

### Inference

In [None]:
test_df = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')
dir_name = '../input/aptos2019-blindness-detection/test_images'

test_img_paths = dir_name + '/' + test_df['id_code'].values + '.png'
print(test_img_paths[0])
test_labels = np.zeros(len(test_img_paths))

test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

test_set = Dataset(
    test_img_paths,
    test_labels,
    transform=test_transform)
test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=16,
    shuffle=False,
    num_workers=2)

# create model
model = get_model(model_name='se_resnext50_32x4d',
                  num_outputs=1,
                  pretrained=False,
                  freeze_bn=True,
                  dropout_p=0)
# sample = pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv')
# test_pred = np.zeros((len(sample), 1))
# submission = pd.DataFrame({'id_code':pd.read_csv('../input/aptos2019-blindness-detection/sample_submission.csv').id_code.values,
#                           'diagnosis':np.squeeze(test_pred).astype(int)})

model = model.cuda()

model.eval()

probs = []
for fold in range(5):
    print('Fold [%d/%d]' %(fold+1, 5))

    model.load_state_dict(torch.load('../input/se-resnext50-32x4d/model_%d.pth' % (fold+1),map_location=device))
    

    probs_fold = []
    with torch.no_grad():
        for i, (input, _) in tqdm(enumerate(test_loader), total=len(test_loader)):
#             print(input)
            input = input.cuda()
            output = model(input)
#             break

            probs_fold.extend(output.data.cpu().numpy()[:, 0])
    probs_fold = np.array(probs_fold)
    probs.append(probs_fold)

probs = np.mean(probs, axis=0)
l1_probs['se_resnext50_32x4d'] = probs

del model

torch.cuda.empty_cache()

!nvidia-smi

## SE-ResNeXt101_32x4d

### Inference

In [None]:
test_df = pd.read_csv('../input/aptos2019-blindness-detection/test.csv')
dir_name = '../input/aptos2019-blindness-detection/test_images'
test_img_paths = dir_name + '/' + test_df['id_code'].values + '.png'
test_labels = np.zeros(len(test_img_paths))

test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

test_set = Dataset(
    test_img_paths,
    test_labels,
    transform=test_transform)
test_loader = torch.utils.data.DataLoader(
    test_set,
    batch_size=16,
    shuffle=False,
    num_workers=2)

# create model
model = get_model(model_name='se_resnext101_32x4d',
                  num_outputs=1,
                  pretrained=False,
                  freeze_bn=True,
                  dropout_p=0)
model = model.cuda()
model.eval()

probs = []
for fold in range(5):
    print('Fold [%d/%d]' %(fold+1, 5))

    model.load_state_dict(torch.load('../input/se-resnext101-32x4d/model_%d.pth' % (fold+1),map_location=device))

    probs_fold = []
    with torch.no_grad():
        for i, (input, _) in tqdm(enumerate(test_loader), total=len(test_loader)):
            input = input.cuda()
            output = model(input)

            probs_fold.extend(output.data.cpu().numpy()[:, 0])
    probs_fold = np.array(probs_fold)
    probs.append(probs_fold)

probs = np.mean(probs, axis=0)
l1_probs['se_resnext101_32x4d'] = probs

del model

torch.cuda.empty_cache()
!nvidia-smi

# Ensemble

In [None]:
preds = 0.5 * l1_probs['se_resnext50_32x4d'] + 0.5 * l1_probs['se_resnext101_32x4d']

In [None]:
thrs = [0.5, 1.5, 2.5, 3.5]
preds[preds < thrs[0]] = 0
preds[(preds >= thrs[0]) & (preds < thrs[1])] = 1
preds[(preds >= thrs[1]) & (preds < thrs[2])] = 2
preds[(preds >= thrs[2]) & (preds < thrs[3])] = 3
preds[preds >= thrs[3]] = 4
preds = preds.astype('int')

test_df['diagnosis'] = preds
test_df.to_csv('submission.csv', index=False)

In [None]:
!rm processed/*