In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Libraries

In [2]:
#Imports
from torch.utils.data import Dataset
import cv2
import openslide
from skimage import io
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score
# from tqdm.notebook import tqdm
from tqdm.notebook import trange, tqdm
import time

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Load into Pandas

In [3]:
train = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv')
train.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0


# Tiling

In [4]:
def create_tiles(I, tile_size = 128, r_offset = 0, c_offset = 0, n = 12, ipr = 4):
    """
    Params:
        tile_size: n x n pixels per tile
        r_offset: tiling starts n pixels left of input image left edge
        c_offset: tiling: starts n pixels above input image top edge
        n: total number of tiles in final stitched image
        ipr: images per row in final stitched image
    Returns:
        final stitched image
    """

    img = I[-1]
    r, c, d = np.shape(img)


    #left side offset padding
    left_pad = np.uint8(np.ones((r, r_offset, d)) * 255)
    img_lp = np.concatenate((left_pad, img),1)

    #build right-side padding
    rn, cn, d = np.shape(img_lp)
    right_pad_amt = tile_size - cn%tile_size
    right_pad = np.uint8(np.ones((rn, right_pad_amt,d)) * 255)
    img_lrp = np.concatenate((img_lp,right_pad),1)

    # top side offset padding
    rn2, cn2, d = np.shape(img_lrp)
    top_pad = np.uint8(np.ones((c_offset, cn2, d)) * 255)
    img_lrtp = np.concatenate((top_pad, img_lrp),0)

    #build bottom-side padding
    rn3, cn3, d = np.shape(img_lrtp)
    bot_pad_amt = tile_size - rn3%tile_size
    bot_pad = np.uint8(np.ones((bot_pad_amt,cn3,d))*255)
    img_lrtbp = np.concatenate((img_lrtp,bot_pad),0)
    
    
    if (np.shape(img_lrtbp)[0] * np.shape(img_lrtbp)[1])/(tile_size*tile_size) < 12:
        white_pad = np.uint8(np.ones((12 * tile_size,cn3,d))*255)
        img_lrtbp = np.concatenate((img_lrtbp,white_pad),0)



    im = img_lrtbp
    M = tile_size
    N = tile_size
    tiles = [im[x:x+M,y:y+N] for x in range(0,im.shape[0],M) for y in range(0,im.shape[1],N)]
    tiles = np.array(tiles)

    num_tiles = len(tiles)

    
    counts = np.zeros(num_tiles)
    for img_num in range(num_tiles):
        counts[img_num] = (tiles[img_num]<255).sum()
    tile_idx = np.argsort(counts)[-n:]
    sub_tiles = tiles[tile_idx]
    
 

    #stick the subtiles together
    x = 4
    y = 3
    tape = np.uint8(np.zeros((tile_size,0,3)))
    for i in range(n):
        tape = np.concatenate((tape,sub_tiles[i]),1)

    num_rows = n/ipr
    cols = np.shape(tape)
    final_img = np.uint8(np.zeros((0,ipr*tile_size,3)))
    idx = 0
    for i in range(int(num_rows)):

        final_img = np.concatenate((final_img, tape[0:tile_size,idx*tile_size*ipr:(idx+1)*tile_size*ipr,:]),0)
        idx = idx + 1


    return final_img

# Dataset

In [5]:
class TestingDataset(Dataset):
    def __init__ (self, df, folder, transform = None):
        self.df = df
        self.folder = folder
        self.transform = transform
        
    def __len__ (self):
        return len(self.df)
    
    def __getitem__(self, idx):
        slide_identifier = self.df['image_id'].values[idx]
        image_path = '/kaggle/input/prostate-cancer-grade-assessment/' + self.folder + '/'+ slide_identifier + '.tiff'
        slide = io.MultiImage(image_path)
        image = create_tiles(slide, tile_size = 128, r_offset = 0, c_offset = 0, n = 12, ipr = 4) #using the smallest image, denoted by [-1] index
#         image = cv2.resize(image, (256, 256)) #downsampling to 256x256, slice and remove A index
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            transformed = self.transform(image = image)
            image = transformed['image']
        

        return image

# Transformations

In [6]:
def get_transform(dataset_type):
    assert dataset_type in ('train', 'valid')
    if dataset_type == 'train':

        return Compose([
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
    elif dataset_type == 'valid':

        return Compose([
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

# Model

In [7]:
from __future__ import print_function, division, absolute_import
from collections import OrderedDict
import math

import torch.nn as nn
from torch.utils import model_zoo

class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
                             padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x


class Bottleneck(nn.Module):
    """
    Base class for bottlenecks that implements `forward()` method.
    """
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = self.se_module(out) + residual
        out = self.relu(out)

        return out


class SEBottleneck(Bottleneck):
    """
    Bottleneck for SENet154.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
                               stride=stride, padding=1, groups=groups,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNetBottleneck(Bottleneck):
    """
    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
    implementation and uses `stride=stride` in `conv1` and not in `conv2`
    (the latter is used in the torchvision implementation of ResNet).
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
                               stride=stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
                               groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNeXtBottleneck(Bottleneck):
    """
    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None, base_width=4):
        super(SEResNeXtBottleneck, self).__init__()
        width = math.floor(planes * (base_width / 64)) * groups
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
                               stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
                               padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SENet(nn.Module):

    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
                 downsample_padding=1, num_classes=1000):
        """
        Parameters
        ----------
        block (nn.Module): Bottleneck class.
            - For SENet154: SEBottleneck
            - For SE-ResNet models: SEResNetBottleneck
            - For SE-ResNeXt models:  SEResNeXtBottleneck
        layers (list of ints): Number of residual blocks for 4 layers of the
            network (layer1...layer4).
        groups (int): Number of groups for the 3x3 convolution in each
            bottleneck block.
            - For SENet154: 64
            - For SE-ResNet models: 1
            - For SE-ResNeXt models:  32
        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
            - For all models: 16
        dropout_p (float or None): Drop probability for the Dropout layer.
            If `None` the Dropout layer is not used.
            - For SENet154: 0.2
            - For SE-ResNet models: None
            - For SE-ResNeXt models: None
        inplanes (int):  Number of input channels for layer1.
            - For SENet154: 128
            - For SE-ResNet models: 64
            - For SE-ResNeXt models: 64
        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
            a single 7x7 convolution in layer0.
            - For SENet154: True
            - For SE-ResNet models: False
            - For SE-ResNeXt models: False
        downsample_kernel_size (int): Kernel size for downsampling convolutions
            in layer2, layer3 and layer4.
            - For SENet154: 3
            - For SE-ResNet models: 1
            - For SE-ResNeXt models: 1
        downsample_padding (int): Padding for downsampling convolutions in
            layer2, layer3 and layer4.
            - For SENet154: 1
            - For SE-ResNet models: 0
            - For SE-ResNeXt models: 0
        num_classes (int): Number of outputs in `last_linear` layer.
            - For all models: 1000
        """
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                    bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
                                    padding=3, bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        # To preserve compatibility with Caffe weights `ceil_mode=True`
        # is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
                                                    ceil_mode=True)))
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(
            block,
            planes=64,
            blocks=layers[0],
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=1,
            downsample_padding=0
        )
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.avg_pool = nn.AvgPool2d(7, stride=1)
        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
                    downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=downsample_kernel_size, stride=stride,
                          padding=downsample_padding, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride,
                            downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def logits(self, x):
        x = self.avg_pool(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, x):
        x = self.features(x)
        x = self.logits(x)
        return x


def initialize_pretrained_model(model, num_classes, settings):
    assert num_classes == settings['num_classes'], \
        'num_classes should be {}, but is {}'.format(
            settings['num_classes'], num_classes)
    model.load_state_dict(model_zoo.load_url(settings['url']))
    model.input_space = settings['input_space']
    model.input_size = settings['input_size']
    model.input_range = settings['input_range']
    model.mean = settings['mean']
    model.std = settings['std']




def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=num_classes)
    if pretrained is not None:
        settings = pretrained_settings['se_resnext50_32x4d'][pretrained]
        initialize_pretrained_model(model, num_classes, settings)
    return model


In [8]:
class se_resnext(nn.Module):
    def __init__(self, freeze = True):
        super(se_resnext, self).__init__()
#         layers = []
#         layers.append(nn.Linear(512, 256))
#         layers.append(nn.ReLU())
#         layers.append(nn.Linear(256, 6))


        self.model = se_resnext50_32x4d(pretrained = None)
#         weights_path = '/kaggle/input/se-resnext50-32x4d/se_resnext50_32x4d-a260b3a4.pth'
#         self.model.load_state_dict(torch.load(weights_path))
        self.model.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.model.last_linear = nn.Linear(self.model.last_linear.in_features, 6)

        
    def forward(self, x):
        x = self.model(x)
        return x

# Inference and Submission

In [9]:
def submit(test_df, image_folder = 'test_images'):
    if os.path.exists('/kaggle/input/prostate-cancer-grade-assessment/' + image_folder):
        print('Running inference ---------->')
        test_dataset = TestingDataset(test_df, image_folder, get_transform(dataset_type = 'valid'))
        testloader = DataLoader(test_dataset, batch_size = 16, shuffle = False)
        probs = []
        for fold_num in range(2):
            PATH = '/kaggle/input/resnext5032x4d/foldnum%s'% fold_num + 'se-resnext50-32x4d-2foldcv-e6.pth'
            net = se_resnext(freeze = False)
            net.load_state_dict(torch.load(PATH, map_location = device)) #might need to be changed
            predicted = predict_test(net, testloader)
            probs.append(predicted)
        probs_avg = np.mean(probs,0)
        predictions = probs_avg.argmax(1)
        test_df['isup_grade'] = predictions
#         test_df.insert(len(test_df.columns), 'isup_grade', predictions, allow_duplicates=True) #adds to last column
    else:
        print('directory not found')
    return test_df

In [10]:
def predict_test(net, dataloader):
    predicted_mat = []
    net.to(device)
    for i, images in enumerate(tqdm(dataloader)):
        images = images.to(device)
        with torch.no_grad():
            predicted = net(images)
        predicted = predicted.to('cpu')
        predicted = predicted.numpy()
        predicted_mat.append(predicted)

    predicted_mat = np.concatenate(predicted_mat)
    return predicted_mat

In [11]:
sampledf = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/sample_submission.csv')
submission = submit(sampledf, image_folder = 'test_images')
submission['isup_grade'] = submission['isup_grade'].astype(int)
submission.to_csv('submission.csv', index=False)
submission.head()

directory not found


Unnamed: 0,image_id,isup_grade
0,005700be7e06878e6605e7a5a39de1b2,0
1,005c6e8877caf724c600fdce5d417d40,0
2,0104f76634ff89bfff1ef0804a95c380,0


In [12]:
# # On Training data sample:
# sampledf = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/sample_submission.csv')
# submission = submit(train.head(), image_folder = 'train_images')
# submission['isup_grade'] = submission['isup_grade'].astype(int)
# submission.to_csv('submission.csv', index=False)
# submission.head()