In [1]:
!pip install ../input/pretrainedmodels/pretrainedmodels-0.7.4/pretrainedmodels-0.7.4/ > /dev/null # no output
!pip install ../input/pytorchcv/pytorchcv-0.0.58-py2.py3-none-any.whl > /dev/null # no output
!pip install ../input/pytorch-tta > /dev/null # no output
!pip install ../input/resnest > /dev/null # no output

In [5]:
import gc
import os
import argparse
import random
import sys
import math
import json
import six
import warnings
from pathlib import Path
from tqdm import tqdm
from logging import getLogger
from IPython.core.display import display, HTML
from collections import defaultdict
from time import perf_counter
from typing import List
from distutils.util import strtobool

sys.path.insert(0, '../input/semisupervised-imagenet-models/semi-supervised-ImageNet1K-models-master/')
package_path = '../input/efficientnetpytorch/'
sys.path.append(package_path)

# --- plotly ---
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly import tools, subplots
py.init_notebook_mode(connected=True)

import numpy
import numpy as np 
import pandas as pd
import cv2
from numpy.random.mtrand import RandomState

import matplotlib.pyplot as plt
import seaborn as sns

import sklearn.metrics
from sklearn import preprocessing
from sklearn.model_selection import KFold, train_test_split

import torch
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
from torch import nn
from torch.nn.parameter import Parameter
from torch.nn import init, Sequential
from torch.utils.data.dataloader import DataLoader

import albumentations
import albumentations as A
import skimage.io
from PIL import Image, ImageOps, ImageEnhance
from skimage.transform import resize, rescale
from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip
from albumentations.pytorch import ToTensorV2
from skimage.transform import AffineTransform, warp

import ttach as tta
from torchvision import transforms
from hubconf import *
from efficientnet_pytorch import EfficientNet

In [6]:
debug=False
submission=False
batch_size=4
device='cuda:0'
out='.'
image_size=256
arch='pretrained'

sz = 256
N = 36

In [7]:
DATA = '../input/prostate-cancer-grade-assessment/test_images'
IMAGES = '../input/prostate-cancer-grade-assessment/train_images'
TEST = pd.read_csv('../input/prostate-cancer-grade-assessment/test.csv')
TRAIN = pd.read_csv('../input/prostate-cancer-grade-assessment/train.csv')
SAMPLE = '../input/prostate-cancer-grade-assessment/sample_submission.csv'

model_dir = '../input/fold4-epoch25/'

MODELS = []
for filename in os.listdir(model_dir):
    if filename.endswith(".pt"): 
        print(os.path.join(model_dir, filename))
        MODELS.append(os.path.join(model_dir, filename))


../input/fold4-epoch25/model_026.pt


In [8]:
MODELS[0]

'../input/fold4-epoch25/model_026.pt'

In [9]:
TEST.head()

Unnamed: 0,image_id,data_provider
0,005700be7e06878e6605e7a5a39de1b2,radboud
1,005c6e8877caf724c600fdce5d417d40,karolinska
2,0104f76634ff89bfff1ef0804a95c380,radboud


In [13]:
def get_tiles(img, mode=0):
        result = []
        tile_size = sz
        n_tiles = N
        h, w, c = img.shape
        pad_h = (tile_size - h % tile_size) % tile_size + ((tile_size * mode) // 2)
        pad_w = (tile_size - w % tile_size) % tile_size + ((tile_size * mode) // 2)

        img2 = np.pad(img,[[pad_h // 2, pad_h - pad_h // 2], [pad_w // 2,pad_w - pad_w//2], [0,0]], constant_values=255)
        img3 = img2.reshape(
            img2.shape[0] // tile_size,
            tile_size,
            img2.shape[1] // tile_size,
            tile_size,
            3
        )

        img3 = img3.transpose(0,2,1,3,4).reshape(-1, tile_size, tile_size,3)
        n_tiles_with_info = (img3.reshape(img3.shape[0],-1).sum(1) < tile_size ** 2 * 3 * 255).sum()
        if len(img3) < n_tiles:
            img3 = np.pad(img3,[[0,n_tiles-len(img3)],[0,0],[0,0],[0,0]], constant_values=255)
        idxs = np.argsort(img3.reshape(img3.shape[0],-1).sum(-1))[:n_tiles]
        img3 = img3[idxs]
        for i in range(len(img3)):
            result.append({'img':img3[i], 'idx':i})
        return result, n_tiles_with_info >= n_tiles


class PANDADataset(Dataset):
    def __init__(self,
                 df,
                 image_size,
                 n_tiles=N,
                 tile_mode=0,
                 rand=False,
                 transform=None,
                 mode='test'
                ):

        self.df = df.reset_index(drop=True)
        self.image_size = image_size
        self.n_tiles = n_tiles
        self.tile_mode = tile_mode
        self.rand = rand
        self.transform = transform
        self.mode = mode

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        img_id = row.image_id
        
        file_path = f'../input/prostate-cancer-grade-assessment/test_images/{img_id}.tiff'
        image = skimage.io.MultiImage(file_path)[1]
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        tiles, OK = get_tiles(image, self.tile_mode)
        
        mean=[0.485, 0.456, 0.406]
        std=[0.229, 0.224, 0.225]
        
        image_size = sz

        if self.rand:
            idxes = np.random.choice(list(range(self.n_tiles)), self.n_tiles, replace=False)
        else:
            idxes = list(range(self.n_tiles))

        n_row_tiles = int(np.sqrt(self.n_tiles))
        images = np.zeros((image_size * n_row_tiles, image_size * n_row_tiles, 3))
        for h in range(n_row_tiles):
            for w in range(n_row_tiles):
                i = h * n_row_tiles + w
    
                if len(tiles) > idxes[i]:
                    this_img = tiles[idxes[i]]['img']
                else:
                    this_img = np.ones((self.image_size, self.image_size, 3)).astype(np.uint8) * 255
                #this_img = 255 - this_img
                this_img = this_img.astype(np.float32)/255
                this_img = (this_img - mean)/std
                if self.transform is not None:
                    this_img = self.transform(image=this_img)['image']
                h1 = h * image_size
                w1 = w * image_size
                images[h1:h1+image_size, w1:w1+image_size] = this_img

        if self.transform is not None:
            images = self.transform(image=images)['image']
        images = images.astype(np.float32)
        #images /= 255
        images = images.transpose(2, 0, 1)
        
        if self.mode == 'train' or self.mode == 'valid':
            label = np.zeros(5).astype(np.float32)
            label[:row.isup_grade] = 1.
            return torch.tensor(images), torch.tensor(label)
        else:
            return torch.tensor(images), img_id

In [14]:
transforms_train = albumentations.Compose([
    albumentations.VerticalFlip(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
])
transforms_val = albumentations.Compose([])

transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.VerticalFlip(),
    ]
)

In [15]:
##debug
# #image_id = TRAIN.loc[TRAIN['image_id'] == '3790f55cad63053e956fb73027179707'].image_id
# image_id = TRAIN[2225:2228].reset_index(drop=True)

# #train_dataset = ProstateDataset(train_csv['image_id'], labels=train_csv, mode='train', transform=get_transforms(data='train'))
# test_dataset = PANDADataset(TRAIN, sz, N, 0, transform=transforms_val, mode='test')
# test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

# %%time

# for image, label in test_loader:
#     print(image.shape)
#     image = image.permute(0, 2, 3, 1)
#     print(image.shape)
#     plt.imshow(image[0])
#     plt.show()
#     #print(image[0])
#     cv2.imwrite('/kaggle/working/test1.png', np.array(image[0]))
#     break

# del test_dataset, test_loader
# gc.collect()

In [23]:
class CustomEfficientNet(nn.Module):
    def __init__(self,in_channels=3, out_channels=5):
        super().__init__()
        self.model = EfficientNet.from_name("efficientnet-b0")
        self.model._fc = nn.Linear(self.model._fc.in_features, out_channels)
        
    def forward(self, x):
      x = self.model(x)
      return x

In [28]:
class WrappedModel(nn.Module):
    def __init__(self, module):
        super(WrappedModel, self).__init__()
        self.module = module
    def forward(self, x):
        return self.module(x)

In [31]:

device='cuda:0'
device = torch.device(device)
models = []
for path in MODELS:
    #state_dict = torch.load(path,map_location=torch.device('cpu'))
    model = CustomEfficientNet()
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(path))
    #model = tta.ClassificationTTAWrapper(model, transforms, merge_mode='mean')
    model.to(device)
    model.float()
    model.eval()
    models.append(model)


In [35]:
def test():
    names,preds,LOGITS = [],[], []
    for idx, (inputs,image_id) in tqdm(enumerate(test_loader),total=len(test_loader)):
        inputs = inputs.float()
        inputs = inputs.to(device)
        with torch.no_grad():
            logits = [model(inputs) for model in models]
            logits = torch.stack(logits,1)
            logits = logits.mean(1)

        names.append(image_id)
        LOGITS.append(logits)

    torch.cuda.empty_cache()
    gc.collect()
    label = np.concatenate(names)
    
    logit = (torch.cat(LOGITS).sigmoid().cpu())
    preds = logit.sum(1).round().numpy()

    predictions = preds.astype(int)

    sub_df = pd.DataFrame({'image_id': label, 'isup_grade': predictions})
    sub_df.to_csv('submission1.csv', index=False)
    sub_df.head()
    
def test2():
    names,preds,LOGITS = [],[], []
    for idx, (inputs,image_id) in tqdm(enumerate(test_loader2),total=len(test_loader2)):
        inputs = inputs.float()
        inputs = inputs.to(device)
        with torch.no_grad():
            logits = [model(inputs) for model in models]
            logits = torch.stack(logits,1)
            logits = logits.mean(1)

        names.append(image_id)
        LOGITS.append(logits)

    torch.cuda.empty_cache()
    gc.collect()
    label = np.concatenate(names)
    
    logit = (torch.cat(LOGITS).sigmoid().cpu())
    preds = logit.sum(1).round().numpy()

    predictions = preds.astype(int)

    sub_df = pd.DataFrame({'image_id': label, 'isup_grade': predictions})
    sub_df.to_csv('submission2.csv', index=False)
    sub_df.head()

In [36]:
sub_df = pd.read_csv(SAMPLE)
if os.path.exists(DATA):
    #test_dataset = PANDADataset(TRAIN[0:100], sz, N, 0, transform=transforms_val, mode='test') ## debug
    #test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) ## debug
    
    test_dataset = PANDADataset(TEST, sz, N, 0, transform=transforms_val, mode='test')  # mode == 0
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    test_dataset2 = PANDADataset(TEST, sz, N, 2, transform=transforms_val, mode='test')  # mode == 2
    test_loader2 = DataLoader(test_dataset2, batch_size=batch_size, shuffle=False, num_workers=4)
    
    test()
    test2()
    
    sub1 = pd.read_csv('submission1.csv')
    sub2 = pd.read_csv('submission2.csv')

    col1 = sub1['isup_grade']
    col2 = sub2['isup_grade']
    img_id = sub1['image_id']

    final_sub = ((col1 + col2)/2).astype(np.int64)
    final_submission = pd.DataFrame({'image_id': img_id, 'isup_grade': final_sub})
    final_submission.to_csv('submission.csv', index=False)
    
else:
    sub_df.to_csv("submission.csv", index=False)

In [37]:
sub_df.head(20)

Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,0
1,005c6e8877caf724c600fdce5d417d40,0
2,0104f76634ff89bfff1ef0804a95c380,0
