In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
#         print(os.path.join(dirname, filename))


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
import os
import sys 
import glob
import time
import gc
from pathlib import Path

import numpy as np
import pandas as pd
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2

import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings('ignore')

In [8]:
#declare variables
data_directory =  Path('../input/processed-voxels/voxels')

scan_types = ['T2w','T1wCE', 'T1w', 'FLAIR']

In [9]:
#training labels
train_labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')
train_labels = train_labels[~train_labels['BraTS21ID'].isin([109, 123, 709])]

In [10]:
#creating training & validation data
df_train, df_valid = train_test_split(
    train_labels, 
    test_size=0.2, 
    random_state=42, 
    stratify=train_labels["MGMT_value"])

print(df_train.shape)
print(df_valid.shape)

(465, 2)
(117, 2)


In [12]:
#apply mclache using github repo https://github.com/VincentStimper/mclahe

slice_ind = slice

def slice(input, begin, size):
    """
    Slice an array using a iterable start and length vector
    :param input: Input array to be sliced
    :param begin: Vector of indices where to start slicing
    :param size: Vector of slice lengths
    :return: Sliced array
    """

    slices = tuple(slice_ind(b, b + s) for b, s in zip(begin, size))
    return input[slices]


def batch_gather(params, indices, axis):
    """
    Gather slices from `params` according to `indices` with leading batch dims.
    This operation assumes that the leading dimensions of `indices` are dense,
    and the gathers on the axis corresponding to the last dimension of `indices`.
    More concretely it computes:
    `result[i1, ..., in, j1, ..., jm, k1, ...., kl] = params[i1, ..., in, indices[i1, ..., in, j1, ..., jm], k1, ..., kl]`
    Therefore `params` should be a Tensor of shape [A1, ..., AN, C0, B1, ..., BM],
    `indices` should be a Tensor of shape [A1, ..., AN, C1, ..., CK] and `result` will be
    a Tensor of size `[A1, ..., AN, C1, ..., CK, B1, ..., BM]`.
    Args:
      params: The array from which to gather values.
      indices: Must be one of the following types: int32, int64. Index
          array. Must be in range `[0, params.shape[axis]`, where `axis` is the
          last dimension of `indices` itself.
      axis: Must be one of the following types: int32, int64. The axis
            in `params` to gather `indices` from.
    Returns:
      An array. Has the same type as `params`.
    """

    indices_shape = indices.shape
    params_shape = params.shape
    ndim = indices.ndim
    indices_internal = indices.copy()

    # Adapt indices to act on respective batch
    accum_dim_value = 1
    for dim in range(axis, 0, -1):
        dim_value = params_shape[dim - 1]
        accum_dim_value *= params_shape[dim]
        dim_indices = np.arange(dim_value)
        dim_indices *= accum_dim_value
        dim_shape = [1] * (dim - 1) + [dim_value] + [1] * (ndim - dim)
        indices_internal += dim_indices.reshape(*dim_shape)

    flat_inner_shape_indices = np.prod(indices_shape[:(axis + 1)])
    flat_indices = indices_internal.reshape(*((flat_inner_shape_indices,) + indices_shape[(axis + 1):]))
    outer_shape = params_shape[(axis + 1):]
    flat_inner_shape_params = np.prod(params_shape[:(axis + 1)])

    flat_params = params.reshape(*((flat_inner_shape_params,) + outer_shape))
    flat_result = flat_params[flat_indices,...]
    result = flat_result.reshape(*(indices_shape + outer_shape))
    return result


def batch_histogram(values, value_range, axis, nbins=100, use_map=False):
    """
    Computes histogram with fixed width considering batch dimensions
    :param values: Array containing the values for histogram computation.
    :param value_range: Shape [2] iterable. values <= value_range[0] will be mapped to
    hist[0], values >= value_range[1] will be mapped to hist[-1].
    :param axis: Number of batch dimensions. First axis to apply histogram computation to.
    :param nbins: Scalar. Number of histogram bins.
    :param use_map: Flag indicating whether map function is used
    :return: histogram with batch dimensions.
    """

    # Get shape
    values_shape = values.shape
    batch_dim = values_shape[:axis]
    outer_dim = values_shape[axis:]
    num_batch = np.prod(batch_dim)

    if use_map:
        values_reshaped = values.reshape(*((num_batch,) + outer_dim))
        hist = np.array(list(map(lambda x: np.histogram(x, range=value_range, bins=nbins)[0], values_reshaped)))
    else:
        # Normalize
        values_double = values.astype('double')
        value_range_double = np.array(value_range).astype('double')

        # Clip values
        values_norm = (values_double - value_range_double[0]) / (value_range_double[1] - value_range_double[0])
        values_clip1 = np.maximum(values_norm, 0.5 / nbins)
        values_clip2 = np.minimum(values_clip1, 1.0 - 0.5 / nbins)

        # Shift values
        values_shift = values_clip2 + np.arange(num_batch).reshape(*(batch_dim + len(outer_dim) * (1,)))

        # Get histogram
        hist = np.histogram(values_shift, range=[0, num_batch], bins=num_batch * nbins)[0]

    return hist.reshape(*(batch_dim + (nbins,)))


import numpy as np
from itertools import product

def mclahe(x, kernel_size=None, n_bins=128, clip_limit=0.01, adaptive_hist_range=False):
    """
    Contrast limited adaptive histogram equalization
    :param x: numpy array to which clahe is applied
    :param kernel_size: tuple of kernel sizes, 1/8 of dimension lengths of x if None
    :param n_bins: number of bins to be used in the histogram
    :param clip_limit: relative intensity limit to be ignored in the histogram equalization
    :param adaptive_hist_range: flag, if true individual range for histogram computation of each block is used
    :return: numpy array to which clahe was applied, scaled on interval [0, 1]
    """

    if kernel_size is None:
        kernel_size = tuple(s // 8 for s in x.shape)
    kernel_size = np.array(kernel_size)

    assert len(kernel_size) == len(x.shape)

    dim = len(x.shape)

    # Normalize data
    x_min = np.min(x)
    x_max = np.max(x)
    x = (x - x_min) / (x_max - x_min)

    # Pad data
    x_shape = np.array(x.shape)
    padding_x_length = kernel_size - 1 - ((x_shape - 1) % kernel_size)
    padding_x = np.column_stack(((padding_x_length + 1) // 2, padding_x_length // 2))
    padding_hist = np.column_stack((kernel_size // 2, (kernel_size + 1) // 2)) + padding_x
    x_hist_padded = np.pad(x, padding_hist, 'symmetric')

    x_padded = slice(x_hist_padded, kernel_size // 2, x_shape + padding_x_length)

    # Form blocks used for interpolation
    n_blocks = np.ceil(np.array(x.shape) / kernel_size).astype(np.int32)
    new_shape = np.reshape(np.column_stack((n_blocks, kernel_size)), (2 * dim,))
    perm = tuple(2 * i for i in range(dim)) + tuple(2 * i + 1 for i in range(dim))
    x_block = np.transpose(x_padded.reshape(*new_shape), perm)
    shape_x_block = np.concatenate((n_blocks, kernel_size))

    # Form block used for histogram
    n_blocks_hist = n_blocks + np.ones(dim, dtype=np.int32)
    new_shape = np.reshape(np.column_stack((n_blocks_hist, kernel_size)), (2 * dim,))
    perm = tuple(2 * i for i in range(dim)) + tuple(2 * i + 1 for i in range(dim))
    x_hist = np.transpose(x_hist_padded.reshape(*new_shape), perm)

    # Get maps
    # Get histogram
    if adaptive_hist_range:
        hist_ex_shape = np.concatenate((n_blocks_hist, [1] * dim))
        x_hist_max = np.max(x_hist, tuple(np.arange(-dim, 0)))
        x_hist_min = np.min(x_hist, tuple(np.arange(-dim, 0)))
        x_hist_norm = np.where(x_hist_min == x_hist_max, np.ones_like(x_hist_min), x_hist_max - x_hist_min)

        x_hist_scaled = (x_hist - x_hist_min.reshape(*hist_ex_shape)) / x_hist_norm.reshape(*hist_ex_shape)
    else:
        x_hist_scaled = x_hist
    hist = batch_histogram(x_hist_scaled, [0., 1.], dim, nbins=n_bins).astype(np.float32)
    # Clip histogram
    n_to_high = np.sum(np.maximum(hist - np.prod(kernel_size) * clip_limit, 0), -1, keepdims=True)
    hist_clipped = np.minimum(hist, np.prod(kernel_size) * clip_limit) + n_to_high / n_bins
    cdf = np.cumsum(hist_clipped, -1)
    cdf_min = cdf[..., :1]
    cdf_max = cdf[..., -1:]
    cdf_norm = np.where(cdf_min == cdf_max, np.ones_like(cdf_max), cdf_max - cdf_min)
    mapping = (cdf - cdf_min) / cdf_norm

    # Get global hist bins if needed
    # Compute always as they are needed for both modes
    bin_edges = np.histogram_bin_edges(x_hist_scaled, range=[0., 1.], bins=n_bins)[1:-1]
    if not adaptive_hist_range:
        # Global bins
        bin_ind = np.digitize(x_block, bin_edges)

    # Loop over maps to compute result
    res = np.zeros(shape_x_block)
    inds = [list(i) for i in product([0, 1], repeat=dim)]
    for ind_map in inds:
        # Compute bin indices if local bins are used
        if adaptive_hist_range:
            # Local bins
            hist_norm_slice_shape = np.concatenate((n_blocks, [1] * dim))
            x_hist_min_sub = slice(x_hist_min, ind_map, n_blocks)
            x_hist_norm_sub = slice(x_hist_norm, ind_map, n_blocks)
            x_block_scaled = (x_block - x_hist_min_sub.reshape(*hist_norm_slice_shape)) \
                             / x_hist_norm_sub.reshape(*hist_norm_slice_shape)
            bin_ind = np.digitize(x_block_scaled, bin_edges)
        
        # Apply map
        map_slice = slice(mapping, ind_map + [0], list(n_blocks) + [n_bins])
        mapped_sub = batch_gather(map_slice, bin_ind, dim)

        # Calculate and apply coefficients
        res_sub = mapped_sub
        for axis in range(dim):
            coeff = np.arange(kernel_size[axis], dtype=np.float32) / kernel_size[axis]
            if kernel_size[axis] % 2 == 0:
                coeff = 0.5 / kernel_size[axis] + coeff
            if ind_map[axis] == 0:
                coeff = 1. - coeff
            new_shape = [1] * (dim + axis) + [kernel_size[axis]] + [1] * (dim - 1 - axis)
            coeff = np.reshape(coeff, new_shape)
            res_sub = coeff * res_sub

        # Update results
        res = res + res_sub

    # Rescaling
    res_min, res_max = (np.min(res), np.max(res))
    res_norm = (res - res_min) / (res_max - res_min)

    # Reshape result
    new_shape = tuple((axis, axis + dim) for axis in range(dim))
    new_shape = tuple(j for i in new_shape for j in i)
    res_transposed = np.transpose(res_norm, new_shape)
    res_reshaped = res_transposed.reshape(*tuple(n_blocks[axis] * kernel_size[axis] for axis in range(dim)))

    # Recover original size
    result = slice(res_reshaped, padding_x[:, 0], x.shape)

    return result


In [19]:
def get_image_plane(data):
    
    '''
    Get Image Plane based on the X & Y
    coordinates of the scan stored in
    the metadata of scan
    
    '''
    x1, y1, _, x2, y2, _ = [round(j) for j in data.ImageOrientationPatient]
    cords = [x1, y1, x2, y2]

    if cords == [1, 0, 0, 0]:
        return 'Coronal'
    elif cords == [1, 0, 0, 1]:
        return 'Axial'
    elif cords == [0, 1, 0, 0]:
        return 'Sagittal'
    else:
        return 'Unknown'
    
def get_voxel(split, study_id, scan_type):
    
    '''
    Create Voxel for the given MRI scan and
    reorder & rotate the scan plane to align
    in the axial plane
    '''
    imgs = []
    dcm_dir = data_root.joinpath(split, study_id, scan_type)
    dcm_paths = sorted(dcm_dir.glob("*.dcm"), key=lambda x: int(x.stem.split("-")[-1]))
    positions = []
    
    for dcm_path in dcm_paths:
        img = pydicom.dcmread(str(dcm_path))
        imgs.append(img.pixel_array)
        positions.append(img.ImagePositionPatient)
        
    plane = get_image_plane(img)
    voxel = np.stack(imgs)
    
    # reorder planes if needed and rotate voxel
    if plane == "Coronal":
        if positions[0][1] < positions[-1][1]:
            voxel = voxel[::-1]
        voxel = voxel.transpose((1, 0, 2))
    elif plane == "Sagittal":
        if positions[0][0] < positions[-1][0]:
            voxel = voxel[::-1]
        voxel = voxel.transpose((1, 2, 0))
        voxel = np.rot90(voxel, 2, axes=(1, 2))
    elif plane == "Axial":
        if positions[0][2] > positions[-1][2]:
            voxel = voxel[::-1]
        voxel = np.rot90(voxel, 2)
    else:
        raise ValueError(f"Unknown plane {plane}")
    return voxel

def normalize_contrast(voxel):
    if voxel.sum() == 0:
        return voxel
    voxel = voxel - np.min(voxel)
    voxel = voxel / np.max(voxel)
    voxel = (voxel * 255).astype(np.uint8)
    return voxel

def crop_voxel(voxel):
    '''
    Crop voxels by removing columns with 
    mean value = 0
    '''
    
    if voxel.sum() == 0:
        return voxel
    keep = (voxel.mean(axis=(0, 1)) > 0)
    voxel = voxel[:, :, keep]
    keep = (voxel.mean(axis=(0, 2)) > 0)
    voxel = voxel[:, keep, :]
    keep = (voxel.mean(axis=(1, 2)) > 0)
    voxel = voxel[keep, :, :]
    return voxel

In [20]:
def filter_voxel(voxel, filter_thr):
    voxel_mean = voxel.mean(axis=(1, 2))
    keep = (voxel_mean > voxel_mean.std()*filter_thr)
    voxel = voxel[keep, :, :]
    return voxel

def resize_voxel(voxel, sz=(64, 256, 256)):
    output = np.zeros((sz[0], sz[1], sz[2]), dtype=np.uint8)
    if np.argmax(voxel.shape) == 0:
        for i, s in enumerate(np.linspace(0, voxel.shape[0] - 1, num=sz[0])):
            sampled = voxel[int(s), :, :]
            output[i, :, :] = cv2.resize(sampled, (sz[2], sz[1]), cv2.INTER_CUBIC)
    elif np.argmax(voxel.shape) == 1:
        for i, s in enumerate(np.linspace(0, voxel.shape[1] - 1, num=sz[1])):
            sampled = voxel[:, int(s), :]
            output[:, i, :] = cv2.resize(sampled, (sz[2], sz[0]), cv2.INTER_CUBIC)
    elif np.argmax(voxel.shape) == 2:
        for i, s in enumerate(np.linspace(0, voxel.shape[2] - 1, num=sz[2])):
            sampled = voxel[:, :, int(s)]
            output[:, :, i] = cv2.resize(sampled, (sz[1], sz[0]), cv2.INTER_CUBIC)
    return output

def clahe_3d(voxel):
    voxel = mclahe(voxel, kernel_size=[8,32,32],
              n_bins=128,
              clip_limit=0.01,
              adaptive_hist_range=False)
    return (voxel*255.).astype(np.uint8).clip(0, 255)


In [21]:
def preprocess_data(split, study_id, scan_type) :
    
    '''
    Combine all the data preprocessing functions
    '''
    
    voxel = get_voxel(split, study_id, scan_type)
    voxel = normalize_contrast(voxel)
    voxel = crop_voxel(voxel)
    voxel = filter_voxel(voxel, filter_thr = 0.4)
    voxel = resize_voxel(voxel, sz=(64, 256, 256))
    voxel = clahe_3d(voxel)
    
    return voxel

In [22]:
def create_final_tensor(split, study_id) :
    
    '''
    Add Scan type as channels
    '''
    
    voxel_list = []
    
    for scan in scan_types :
        voxel = preprocess_data(split, study_id, scan)
        voxel = np.expand_dims(voxel, 0)
        voxel_list.append(voxel)
        
    return np.concatenate(voxel_list, axis =0)

In [23]:
class MRIScanDataset(Dataset) :
    
    def __init__(self, paths, targets=None,
                 label_smoothing=0.01, split="train", augment=False) :
        
        self.paths = paths
        self.targets = targets
        self.label_smoothing = label_smoothing
        self.split = split
        self.augment = augment
        
        
    def __len__(self) :
        return len(self.paths)
    
    def __getitem__(self, index) :
        
        study_id = self.paths[index]
        
        voxel = create_final_tensor(self.split, study_id)
        gc.collect()
            
        if self.targets is None:
            return {"X": torch.tensor(voxel).float(), "id": study_id}
        else:
            y = torch.tensor(abs(self.targets[index]-self.label_smoothing), dtype=torch.float)
            return {"X": torch.tensor(voxel).float(), "y": y}
            

In [24]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.conv_layer1 = self._conv_layer_set(4, 16)
        self.conv_layer2 = self._conv_layer_set(16, 32)
        self.conv_layer3 = self._conv_layer_set(32, 64)
        self.fc1 = nn.Linear(6*30*30*64, 64) 
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.LeakyReLU()
        self.batch=nn.BatchNorm1d(64)
        self.drop=nn.Dropout(p=0.15)        
        
    def _conv_layer_set(self, in_c, out_c):
        conv_layer = nn.Sequential(
        nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
        nn.BatchNorm3d(out_c),
        nn.LeakyReLU(),
        nn.MaxPool3d((2, 2, 2)),
        nn.Dropout3d(0.2)
        )
        return conv_layer
    

    def forward(self, x):
        # Set 1
#         print(x.shape)
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = self.conv_layer3(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.batch(out)
        out = self.drop(out)
        out = self.fc2(out)
        
        return out

In [25]:
class TrainModel:
    
    def __init__(self, model, device, 
                optimizer, criterion ):
        
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        
        self.best_valid_score = np.inf
        self.n_patience = 0
        self.lastmodel = None
        
    def fit(self, epochs, train_dataloader, valid_dataloader, save_path, patience):
        
        start_time = time.time()
        
        for epoch in range(1, epochs + 1):
            
            print(f'Running Epoch {epoch}...................')
            gc.collect()
            
            train_loss, train_auc = self.train(train_dataloader)
            val_loss, val_auc = self.validation(valid_dataloader)
            
            print(f'For Epoch {epoch :>7d} Train Loss {train_loss : >5f} Train AUC {train_auc} Val Loss {val_loss} Val AUC {val_auc} ')
            print(f'For Epoch {epoch :>7d} Time Taken {(time.time() - start_time)/60}')
            
            
            if self.best_valid_score > val_loss: 
                
                self.save_model(epoch, save_path, val_loss, val_auc)
                print(f'AUC Improved from {self.best_valid_score :4f} to {val_loss}. Saved model to {self.lastmodel}')
                
                #updating the lossed
                self.best_valid_score = val_loss
                self.n_patience = 0
            else:
                self.n_patience += 1
            
            if self.n_patience >= patience:
                print(f"\nValid auc didn't improve last {patience} epochs.")
                break
            
            
            
    def train(self, train_dataloader) :
        '''
        For Training the model.
        We will be calculating batch wise loss and 
        finally calcualting auc on the overall y
        '''
    
        self.model.train()
        sum_loss = 0
        y_all = []
        output_all = []
        start_time = time.time()

        for batch, data in enumerate(train_dataloader) :

            X = data['X'].to(self.device)
            y = data['y'].to(self.device)
            gc.collect()

            self.optimizer.zero_grad()     #clearning the accumulated gradients
            pred = self.model(X).squeeze(1)           #make the prediction
            loss = self.criterion(pred, y) #calcualte the loss
            loss.backward()           #backpropagation
            self.optimizer.step()          #update weights

            sum_loss += loss.detach().item()
            y_all.extend(data['y'].tolist()) #save all y values to y_val
            output_all.extend(torch.sigmoid(pred).tolist())  #save all pred to output all

            #print peformance
            if batch % 20 == 0 :
                time_taken = (time.time() - start_time)
                start_time = time.time()
                print(f'Train Batch {batch + 1 :>7d} Loss : {sum_loss/(batch +1)} Time Taken : {time_taken/60} ')

        y_all = [1 if x > 0.5 else 0 for x in y_all]
        train_auc = roc_auc_score(y_all, output_all)

        return sum_loss/len(train_dataloader) , train_auc
    
    
    
    def validation(self, val_dataloader) :
    
        self.model.eval()
        sum_loss = 0
        y_all = []
        output_all = []

        for batch, data in enumerate(val_dataloader) :

            with torch.no_grad() :

                X_val = data['X'].to(self.device)
                y_val = data['y'].to(self.device)

                pred = self.model(X_val).squeeze(1)   #make the prediction
                loss = self.criterion(pred, y_val) #calcualte the loss

                sum_loss += loss.detach().item()
                y_all.extend(data['y'].tolist()) #save all y values to y_val
                output_all.extend(torch.sigmoid(pred).tolist())  #save all pred to output all

                #print peformance
                if batch % 20 == 0 :
                    print(f'Test Batch {batch + 1 :>7d} Loss : {sum_loss/(batch +1)}')

        y_all = [1 if x > 0.5 else 0 for x in y_all]
        val_auc = roc_auc_score(y_all, output_all)

        return sum_loss/len(val_dataloader) , val_auc

            
    def save_model(self, n_epoch, save_path, loss, auc):
        
        self.lastmodel = f"{save_path}_loss{loss:.3f}_auc{auc:.3f}.pth"
        torch.save(self.model.state_dict(), self.lastmodel)
       

In [26]:
#train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_mri_type(df_train, df_valid):
    
    #train dataset
    train_dataset = MRIScanDataset(
        df_train["BraTS21ID"].values, 
        df_train["MGMT_value"].values, 
        augment=False)
    
    #valid dataset
    valid_dataset = MRIScanDataset(
        df_valid["BraTS21ID"].values, 
        df_valid["MGMT_value"].values)
    
    #train dataloader
    train_loader = DataLoader(
        train_dataset,
        batch_size=8,
        shuffle=True,
    drop_last=True)

    valid_loader = DataLoader(
        valid_dataset, 
        batch_size=8,
        shuffle=False,
    drop_last=True)
    
    #load model
    model = CNNModel()
    model.to(device)
    
    #define optimizer & criterion
    criterion = F.binary_cross_entropy_with_logits
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
    

    trainer = TrainModel(model, device, 
                    optimizer,criterion)

    history = trainer.fit(epochs= 10, train_dataloader= train_loader, valid_dataloader= valid_loader, save_path= 'model', patience= 6)
    
    return trainer.lastmodel

In [None]:
try :
    model = train_mri_type(df_train, df_valid)
        
except :
    print('Error while handling model files')

In [27]:
def predict(modelfile, df, split):
    
    data_retriever = MRIScanDataset(
        df.index.values, 
        split=split
    )

    data_loader = DataLoader(
        data_retriever,
        batch_size=8,
        shuffle=False
    )
   
    model = CNNModel()
    model.to(device)
    
    model.load_state_dict(torch.load(modelfile))
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        
        print(f"{e}/{len(data_loader)}", end="\r")
        gc.collect()
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            batch_id_list = list(batch["id"])
            ids.extend(batch_id_list)
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

In [None]:
try :
    submission = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
    submission = submission.set_index("BraTS21ID")
    pred = predict(model, submission, "test")
    submission[f"MGMT_value"] = pred
    submission['MGMT_value'].to_csv('submission.csv')
    
except :
    print('Error while Submission')