Inspiration from: https://www.kaggle.com/yasufuminakama/panda-se-resnext50-classification-baseline

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Libraries

In [2]:
#Imports
from torch.utils.data import Dataset
import cv2
import openslide
from skimage import io
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import cohen_kappa_score
# from tqdm.notebook import tqdm
from tqdm.notebook import trange, tqdm
import time

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Load into Pandas

In [3]:
train = pd.read_csv('/kaggle/input/prostate-cancer-grade-assessment/train.csv')
train.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0


# Tiling

In [4]:
def create_tiles(I, tile_size = 128, r_offset = 0, c_offset = 0, n = 12, ipr = 4):
    """
    Params:
        tile_size: n x n pixels per tile
        r_offset: tiling starts n pixels left of input image left edge
        c_offset: tiling: starts n pixels above input image top edge
        n: total number of tiles in final stitched image
        ipr: images per row in final stitched image
    Returns:
        final stitched image
    """

    img = I[-1]
    r, c, d = np.shape(img)


    #left side offset padding
    left_pad = np.uint8(np.ones((r, r_offset, d)) * 255)
    img_lp = np.concatenate((left_pad, img),1)

    #build right-side padding
    rn, cn, d = np.shape(img_lp)
    right_pad_amt = tile_size - cn%tile_size
    right_pad = np.uint8(np.ones((rn, right_pad_amt,d)) * 255)
    img_lrp = np.concatenate((img_lp,right_pad),1)

    # top side offset padding
    rn2, cn2, d = np.shape(img_lrp)
    top_pad = np.uint8(np.ones((c_offset, cn2, d)) * 255)
    img_lrtp = np.concatenate((top_pad, img_lrp),0)

    #build bottom-side padding
    rn3, cn3, d = np.shape(img_lrtp)
    bot_pad_amt = tile_size - rn3%tile_size
    bot_pad = np.uint8(np.ones((bot_pad_amt,cn3,d))*255)
    img_lrtbp = np.concatenate((img_lrtp,bot_pad),0)
    
    
    if (np.shape(img_lrtbp)[0] * np.shape(img_lrtbp)[1])/(tile_size*tile_size) < 12:
        white_pad = np.uint8(np.ones((12 * tile_size,cn3,d))*255)
        img_lrtbp = np.concatenate((img_lrtbp,white_pad),0)



    im = img_lrtbp
    M = tile_size
    N = tile_size
    tiles = [im[x:x+M,y:y+N] for x in range(0,im.shape[0],M) for y in range(0,im.shape[1],N)]
    tiles = np.array(tiles)

    num_tiles = len(tiles)

    
    counts = np.zeros(num_tiles)
    for img_num in range(num_tiles):
        counts[img_num] = (tiles[img_num]<255).sum()
    tile_idx = np.argsort(counts)[-n:]
    sub_tiles = tiles[tile_idx]
    
 

    #stick the subtiles together
    x = 4
    y = 3
    tape = np.uint8(np.zeros((tile_size,0,3)))
    for i in range(n):
        tape = np.concatenate((tape,sub_tiles[i]),1)

    num_rows = n/ipr
    cols = np.shape(tape)
    final_img = np.uint8(np.zeros((0,ipr*tile_size,3)))
    idx = 0
    for i in range(int(num_rows)):

        final_img = np.concatenate((final_img, tape[0:tile_size,idx*tile_size*ipr:(idx+1)*tile_size*ipr,:]),0)
        idx = idx + 1


    return final_img

# Dataset

In [5]:
class TrainingDataset(Dataset):
    def __init__ (self, df, labels, transform = None):
        self.df = df
        self.labels = labels
        self.transform = transform
        
    def __len__ (self):
        return len(self.df)
    
    def __getitem__(self, idx):
        slide_identifier = self.df['image_id'].values[idx]
        image_path = '/kaggle/input/prostate-cancer-grade-assessment/train_images/' + slide_identifier + '.tiff'
        slide = io.MultiImage(image_path)
        image = create_tiles(slide, tile_size = 128, r_offset = 0, c_offset = 0, n = 12, ipr = 4) #using the smallest image, denoted by [-1] index
#         image = cv2.resize(image, (256, 256)) #downsampling to 256x256, slice and remove A index
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            transformed = self.transform(image = image)
            image = transformed['image']
        
        label = self.labels[idx]
        return image, label
    
class TestingDataset(Dataset):
    def __init__ (self, df, folder, transform = None):
        self.df = df
        self.folder = folder
        self.transform = transform
        
    def __len__ (self):
        return len(self.df)
    
    def __getitem__(self, idx):
        slide_identifier = self.df['image_id'].values[idx]
        image_path = '/kaggle/input/prostate-cancer-grade-assessment/' + self.folder + '/'+ slide_identifier + '.tiff'
        slide = io.MultiImage(image_path)
        image = create_tiles(slide, tile_size = 128, r_offset = 0, c_offset = 0, n = 12, ipr = 4) #using the smallest image, denoted by [-1] index
#         image = cv2.resize(image, (256, 256)) #downsampling to 256x256, slice and remove A index
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            transformed = self.transform(image = image)
            image = transformed['image']
        

        return image

# Transformations

In [6]:
def get_transform(dataset_type):
    assert dataset_type in ('train', 'valid')
    if dataset_type == 'train':

        return Compose([
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
    elif dataset_type == 'valid':

        return Compose([
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

# Split Dataset Into n (4) Folds

In [7]:
fold_dataset = train.copy()
kfold = StratifiedKFold(n_splits = 4, shuffle = True ,random_state = 42)
gen = kfold.split(fold_dataset, fold_dataset['isup_grade']) #create generator
for fold, (train_,test_) in enumerate(gen): 
    fold_dataset.loc[test_, 'fold'] = fold #assign fold number to indices for test (1/4 of dataset)
fold_dataset['fold'] = fold_dataset['fold'].astype(int) #change to integer
fold_dataset.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score,fold
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0,3
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0,0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4,2
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4,3
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0,3


# Build Model

resnext50_32x4d: https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

weights: https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth

In [8]:
from __future__ import print_function, division, absolute_import
from collections import OrderedDict
import math

import torch.nn as nn
from torch.utils import model_zoo

class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
                             padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x


class Bottleneck(nn.Module):
    """
    Base class for bottlenecks that implements `forward()` method.
    """
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = self.se_module(out) + residual
        out = self.relu(out)

        return out


class SEBottleneck(Bottleneck):
    """
    Bottleneck for SENet154.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
                               stride=stride, padding=1, groups=groups,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNetBottleneck(Bottleneck):
    """
    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
    implementation and uses `stride=stride` in `conv1` and not in `conv2`
    (the latter is used in the torchvision implementation of ResNet).
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
                               stride=stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
                               groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNeXtBottleneck(Bottleneck):
    """
    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None, base_width=4):
        super(SEResNeXtBottleneck, self).__init__()
        width = math.floor(planes * (base_width / 64)) * groups
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
                               stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
                               padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SENet(nn.Module):

    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
                 downsample_padding=1, num_classes=1000):
        """
        Parameters
        ----------
        block (nn.Module): Bottleneck class.
            - For SENet154: SEBottleneck
            - For SE-ResNet models: SEResNetBottleneck
            - For SE-ResNeXt models:  SEResNeXtBottleneck
        layers (list of ints): Number of residual blocks for 4 layers of the
            network (layer1...layer4).
        groups (int): Number of groups for the 3x3 convolution in each
            bottleneck block.
            - For SENet154: 64
            - For SE-ResNet models: 1
            - For SE-ResNeXt models:  32
        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
            - For all models: 16
        dropout_p (float or None): Drop probability for the Dropout layer.
            If `None` the Dropout layer is not used.
            - For SENet154: 0.2
            - For SE-ResNet models: None
            - For SE-ResNeXt models: None
        inplanes (int):  Number of input channels for layer1.
            - For SENet154: 128
            - For SE-ResNet models: 64
            - For SE-ResNeXt models: 64
        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
            a single 7x7 convolution in layer0.
            - For SENet154: True
            - For SE-ResNet models: False
            - For SE-ResNeXt models: False
        downsample_kernel_size (int): Kernel size for downsampling convolutions
            in layer2, layer3 and layer4.
            - For SENet154: 3
            - For SE-ResNet models: 1
            - For SE-ResNeXt models: 1
        downsample_padding (int): Padding for downsampling convolutions in
            layer2, layer3 and layer4.
            - For SENet154: 1
            - For SE-ResNet models: 0
            - For SE-ResNeXt models: 0
        num_classes (int): Number of outputs in `last_linear` layer.
            - For all models: 1000
        """
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                    bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
                                    padding=3, bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        # To preserve compatibility with Caffe weights `ceil_mode=True`
        # is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
                                                    ceil_mode=True)))
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(
            block,
            planes=64,
            blocks=layers[0],
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=1,
            downsample_padding=0
        )
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.avg_pool = nn.AvgPool2d(7, stride=1)
        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
                    downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=downsample_kernel_size, stride=stride,
                          padding=downsample_padding, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride,
                            downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def logits(self, x):
        x = self.avg_pool(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, x):
        x = self.features(x)
        x = self.logits(x)
        return x


def initialize_pretrained_model(model, num_classes, settings):
    assert num_classes == settings['num_classes'], \
        'num_classes should be {}, but is {}'.format(
            settings['num_classes'], num_classes)
    model.load_state_dict(model_zoo.load_url(settings['url']))
    model.input_space = settings['input_space']
    model.input_size = settings['input_size']
    model.input_range = settings['input_range']
    model.mean = settings['mean']
    model.std = settings['std']




def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=num_classes)
    if pretrained is not None:
        settings = pretrained_settings['se_resnext50_32x4d'][pretrained]
        initialize_pretrained_model(model, num_classes, settings)
    return model



In [9]:
class se_resnext(nn.Module):
    def __init__(self, freeze = True):
        super(se_resnext, self).__init__()
#         layers = []
#         layers.append(nn.Linear(512, 256))
#         layers.append(nn.ReLU())
#         layers.append(nn.Linear(256, 6))


        self.model = se_resnext50_32x4d(pretrained = None)
        weights_path = '/kaggle/input/se-resnext50-32x4d/se_resnext50_32x4d-a260b3a4.pth'
        self.model.load_state_dict(torch.load(weights_path))
        self.model.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.model.last_linear = nn.Linear(self.model.last_linear.in_features, 6)

        
    def forward(self, x):
        x = self.model(x)
        return x

In [10]:
net = se_resnext()
net

se_resnext(
  (model): SENet(
    (layer0): Sequential(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (layer1): Sequential(
      (0): SEResNeXtBottleneck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): 

# Train on 90%, validate on 10%

In [12]:
fold_dataset = train.copy()
kfold = StratifiedKFold(n_splits = 2, shuffle = True ,random_state = 42)
gen = kfold.split(fold_dataset, fold_dataset['isup_grade']) #create generator
for fold, (train_,test_) in enumerate(gen): 
    fold_dataset.loc[test_, 'fold'] = fold #assign fold number to indices for test (1/4 of dataset)
fold_dataset['fold'] = fold_dataset['fold'].astype(int) #change to integer
fold_dataset.head()

Unnamed: 0,image_id,data_provider,isup_grade,gleason_score,fold
0,0005f7aaab2800f6170c399693a96917,karolinska,0,0+0,1
1,000920ad0b612851f8e01bcc880d9b3d,karolinska,0,0+0,0
2,0018ae58b01bdadc8e347995b69f99aa,radboud,4,4+4,1
3,001c62abd11fa4b57bf7a6c603a11bb9,karolinska,4,4+4,1
4,001d865e65ef5d2579c190a0e0350d8f,karolinska,0,0+0,1


In [13]:
 def training_(fold_num, num_epochs):
        
        # get the dataset from fold_dataset for current fold #################################################################
        print('Fold number %s:' % fold_num)
        
        train_df = fold_dataset.loc[fold_dataset['fold'] != fold_num].reset_index(drop = True)
        valid_df = fold_dataset.loc[fold_dataset['fold'] == fold_num].reset_index(drop = True)
        
        train_label = train_df['isup_grade']
        valid_label = valid_df['isup_grade']
        
        train_dataset = TrainingDataset(train_df, train_label, get_transform(dataset_type = 'train'))
        valid_dataset = TrainingDataset(valid_df, valid_label, get_transform(dataset_type = 'valid'))
        
        trainloader = DataLoader(train_dataset, batch_size = 16)
        validloader = DataLoader(valid_dataset, batch_size = 16)
        
        # initialize model, and set criterion, optimizer, scheduler #################################################################
#         net = VGG('VGG16', dropout_rate = 0)
        net = se_resnext(freeze = False)
        net.to(device)
        criterion = nn.CrossEntropyLoss()
        criterion.to(device)
#         optimizer = optim.SGD(net.parameters(), lr = 0.1, momentum = 0.9, weight_decay=5e-4)
        optimizer = optim.Adam(net.parameters(), lr=0.0001, amsgrad=False)
#         scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1, gamma=0.5)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, verbose=True, eps=1e-6)

        
        
        lst_train_qwk = np.zeros(num_epochs)
        lst_valid_qwk = np.zeros(num_epochs)   
        
        start = time.time()
        best_qwk = -np.inf
        for epoch in range(num_epochs):
            running_loss = 0.0
            
            print('learning rate: %f' % optimizer.param_groups[0]['lr'])
            
#             add a get_accuracy function
            
            net.train()
            optimizer.zero_grad() 
            predictions = []
            truth_labels = []
            for i, data in enumerate(tqdm(trainloader)):
                inputs, labels = data[0].to(device),data[1].to(device)
                optimizer.zero_grad() 
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward() #backward() is an attribute of the tensor class
                optimizer.step()
                running_loss += loss.item()
                
                outputs = outputs.detach().to('cpu').numpy().argmax(1)
                labels = labels.detach().to('cpu').numpy()
                predictions.append(outputs)
                truth_labels.append(labels)
                
                

                if i % 20 == 19:    # print every 20 mini-batches
                    print('[%d, %5d] loss: %.3f' %
                          (epoch + 1, i + 1, running_loss / 20))
                    running_loss = 0.0
            
            qwk = cohen_kappa_score(predictions[0], truth_labels[0], weights='quadratic')
            print('Current training QWK: %s' % qwk)
            
            running_loss = 0.0
            predictions = []
            truth_labels = []
            net.eval()
            for i, data in enumerate(tqdm(validloader)):
                inputs, labels = data[0].to(device),data[1].to(device) 
                with torch.no_grad():
                    outputs_predicted = net(inputs)
                loss = criterion(outputs_predicted, labels)
                running_loss += loss.item()
                
                
#                 print(outputs_predicted)
                outputs_predicted = outputs_predicted.to('cpu').numpy().argmax(1)
#                 print(outputs_predicted)
                labels = labels.to('cpu').numpy()
                
                predictions.append(outputs_predicted)
                truth_labels.append(labels)
#             print(predictions)
#             print(truth_labels)
            qwk = cohen_kappa_score(predictions[0], truth_labels[0], weights='quadratic') 
            

            if qwk > best_qwk:
                best_qwk = qwk
                print('Saving Best Score ------------->')
                PATH = 'foldnum%s'% fold_num + 'se-resnext50-32x4d.pth'
                torch.save(net.state_dict(), PATH)
                
            scheduler.step(running_loss)
            print('Current valid QWK: %s' % qwk)
        
        
        print('Best QWK: %s' % best_qwk)
        print('[%d] loss: %.3f' %
              (epoch + 1, running_loss / 100))


        print('Finished Fold Training')
        end = time.time()
        print('Minutes run for: %s' % ((end - start)/60))
        return None

In [14]:
for fold in range(2):
    training_(fold,num_epochs = 6)

Fold number 0:
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[1,    20] loss: 1.635
[1,    40] loss: 1.456
[1,    60] loss: 1.317
[1,    80] loss: 1.397
[1,   100] loss: 1.374
[1,   120] loss: 1.249
[1,   140] loss: 1.275
[1,   160] loss: 1.305
[1,   180] loss: 1.254
[1,   200] loss: 1.297
[1,   220] loss: 1.104
[1,   240] loss: 1.204
[1,   260] loss: 1.283
[1,   280] loss: 1.248
[1,   300] loss: 1.220
[1,   320] loss: 1.200

Current training QWK: -0.11524163568773238


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Saving Best Score ------------->
Current valid QWK: 0.8009331259720063
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[2,    20] loss: 1.116
[2,    40] loss: 1.119
[2,    60] loss: 0.997
[2,    80] loss: 1.069
[2,   100] loss: 1.068
[2,   120] loss: 0.988
[2,   140] loss: 0.993
[2,   160] loss: 1.003
[2,   180] loss: 1.040
[2,   200] loss: 1.047
[2,   220] loss: 0.890
[2,   240] loss: 0.980
[2,   260] loss: 1.034
[2,   280] loss: 0.996
[2,   300] loss: 1.010
[2,   320] loss: 1.019

Current training QWK: 0.8320209973753281


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Saving Best Score ------------->
Current valid QWK: 0.8713450292397661
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[3,    20] loss: 0.958
[3,    40] loss: 0.913
[3,    60] loss: 0.759
[3,    80] loss: 0.821
[3,   100] loss: 0.891
[3,   120] loss: 0.776
[3,   140] loss: 0.733
[3,   160] loss: 0.841
[3,   180] loss: 0.792
[3,   200] loss: 0.865
[3,   220] loss: 0.713
[3,   240] loss: 0.765
[3,   260] loss: 0.776
[3,   280] loss: 0.882
[3,   300] loss: 0.806
[3,   320] loss: 0.788

Current training QWK: 0.875


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Current valid QWK: 0.817629179331307
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[4,    20] loss: 0.746
[4,    40] loss: 0.674
[4,    60] loss: 0.594
[4,    80] loss: 0.655
[4,   100] loss: 0.673
[4,   120] loss: 0.626
[4,   140] loss: 0.538
[4,   160] loss: 0.558
[4,   180] loss: 0.647
[4,   200] loss: 0.664
[4,   220] loss: 0.523
[4,   240] loss: 0.587
[4,   260] loss: 0.600
[4,   280] loss: 0.601
[4,   300] loss: 0.603
[4,   320] loss: 0.636

Current training QWK: 0.86


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Epoch     4: reducing learning rate of group 0 to 5.0000e-05.
Current valid QWK: 0.79050736497545
learning rate: 0.000050


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[5,    20] loss: 0.503
[5,    40] loss: 0.442
[5,    60] loss: 0.420
[5,    80] loss: 0.396
[5,   100] loss: 0.415
[5,   120] loss: 0.381
[5,   140] loss: 0.343
[5,   160] loss: 0.373
[5,   180] loss: 0.355
[5,   200] loss: 0.379
[5,   220] loss: 0.303
[5,   240] loss: 0.328
[5,   260] loss: 0.289
[5,   280] loss: 0.286
[5,   300] loss: 0.301
[5,   320] loss: 0.257

Current training QWK: 1.0


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Current valid QWK: 0.7605633802816901
learning rate: 0.000050


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[6,    20] loss: 0.271
[6,    40] loss: 0.241
[6,    60] loss: 0.219
[6,    80] loss: 0.245
[6,   100] loss: 0.266
[6,   120] loss: 0.221
[6,   140] loss: 0.225
[6,   160] loss: 0.230
[6,   180] loss: 0.208
[6,   200] loss: 0.208
[6,   220] loss: 0.174
[6,   240] loss: 0.184
[6,   260] loss: 0.214
[6,   280] loss: 0.194
[6,   300] loss: 0.156
[6,   320] loss: 0.145

Current training QWK: 1.0


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Current valid QWK: 0.7595993322203672
Best QWK: 0.8713450292397661
[6] loss: 5.122
Finished Fold Training
Minutes run for: 78.16483758290609
Fold number 1:
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[1,    20] loss: 1.614
[1,    40] loss: 1.417
[1,    60] loss: 1.294
[1,    80] loss: 1.304
[1,   100] loss: 1.331
[1,   120] loss: 1.291
[1,   140] loss: 1.260
[1,   160] loss: 1.260
[1,   180] loss: 1.267
[1,   200] loss: 1.246
[1,   220] loss: 1.137
[1,   240] loss: 1.287
[1,   260] loss: 1.311
[1,   280] loss: 1.167
[1,   300] loss: 1.211
[1,   320] loss: 1.182

Current training QWK: -0.31175836030204973


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Saving Best Score ------------->
Current valid QWK: 0.8691588785046729
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[2,    20] loss: 1.118
[2,    40] loss: 1.106
[2,    60] loss: 1.008
[2,    80] loss: 1.049
[2,   100] loss: 1.111
[2,   120] loss: 1.036
[2,   140] loss: 1.012
[2,   160] loss: 0.977
[2,   180] loss: 1.020
[2,   200] loss: 1.029
[2,   220] loss: 0.898
[2,   240] loss: 1.079
[2,   260] loss: 1.113
[2,   280] loss: 0.968
[2,   300] loss: 1.002
[2,   320] loss: 1.004

Current training QWK: 0.5


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Saving Best Score ------------->
Current valid QWK: 0.9361702127659575
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[3,    20] loss: 0.920
[3,    40] loss: 0.931
[3,    60] loss: 0.813
[3,    80] loss: 0.831
[3,   100] loss: 0.902
[3,   120] loss: 0.813
[3,   140] loss: 0.768
[3,   160] loss: 0.777
[3,   180] loss: 0.780
[3,   200] loss: 0.805
[3,   220] loss: 0.704
[3,   240] loss: 0.875
[3,   260] loss: 0.847
[3,   280] loss: 0.803
[3,   300] loss: 0.807
[3,   320] loss: 0.789

Current training QWK: 0.7984084880636605


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Current valid QWK: 0.9123287671232877
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[4,    20] loss: 0.718
[4,    40] loss: 0.745
[4,    60] loss: 0.632
[4,    80] loss: 0.668
[4,   100] loss: 0.687
[4,   120] loss: 0.644
[4,   140] loss: 0.567
[4,   160] loss: 0.546
[4,   180] loss: 0.565
[4,   200] loss: 0.514
[4,   220] loss: 0.501
[4,   240] loss: 0.696
[4,   260] loss: 0.600
[4,   280] loss: 0.599
[4,   300] loss: 0.623
[4,   320] loss: 0.593

Current training QWK: 0.6941896024464832


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Current valid QWK: 0.900497512437811
learning rate: 0.000100


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[5,    20] loss: 0.547
[5,    40] loss: 0.547
[5,    60] loss: 0.417
[5,    80] loss: 0.542
[5,   100] loss: 0.519
[5,   120] loss: 0.461
[5,   140] loss: 0.404
[5,   160] loss: 0.366
[5,   180] loss: 0.448
[5,   200] loss: 0.480
[5,   220] loss: 0.359
[5,   240] loss: 0.432
[5,   260] loss: 0.472
[5,   280] loss: 0.408
[5,   300] loss: 0.492
[5,   320] loss: 0.430

Current training QWK: 0.9883040935672515


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Epoch     5: reducing learning rate of group 0 to 5.0000e-05.
Current valid QWK: 0.9322033898305084
learning rate: 0.000050


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))

[6,    20] loss: 0.456
[6,    40] loss: 0.374
[6,    60] loss: 0.295
[6,    80] loss: 0.353
[6,   100] loss: 0.310
[6,   120] loss: 0.289
[6,   140] loss: 0.239
[6,   160] loss: 0.266
[6,   180] loss: 0.253
[6,   200] loss: 0.218
[6,   220] loss: 0.206
[6,   240] loss: 0.219
[6,   260] loss: 0.248
[6,   280] loss: 0.205
[6,   300] loss: 0.183
[6,   320] loss: 0.184

Current training QWK: 1.0


HBox(children=(FloatProgress(value=0.0, max=332.0), HTML(value='')))


Current valid QWK: 0.909967845659164
Best QWK: 0.9361702127659575
[6] loss: 4.757
Finished Fold Training
Minutes run for: 78.74552985429764
