## Use stacked images (3D) and Efficientnet3D model

Acknowledgements:

- https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling
- https://www.kaggle.com/furcifer/torch-efficientnet3d-for-mri-no-train
- https://github.com/shijianjian/EfficientNet-PyTorch-3D
    
    
Use models with only one MRI type, then ensemble the 4 models 


In [1]:
import os
import sys 
import json
import glob
import random
import collections
import time

import numpy as np
import pandas as pd
import pydicom
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import imgaug as ia
import imgaug.augmenters as iaa

In [2]:
dataset = 'test'
series_names = ['FLAIR','T1w','T1wCE','T2w']
directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
# This function gets called by get_middle_images function
#Returns the list of all images present in a particular series(modality) of a given patient
def get_series_list(dataset, study_id, series_name):

    series_list = []

    for subdirs, dirs, files in os.walk(directory + '/' + dataset + '/' + study_id + "/" + series_name):
        series_list = os.listdir(directory + '/' + dataset + '/' + study_id + '/' + series_name)
    return series_list


def get_middle_images(study_id):
    
    middle_images = []
    
    # Iterate through each of the four series directories and get the files 
    for ser in series_names:
        series_files = get_series_list(dataset, study_id, ser)
        series_df = pd.DataFrame(columns = ['image','instance_number'])

        # Get the DICOM InstanceNumber tag to order the images since we can't rely on the filenames to be in order
        for s in series_files:
            img = pydicom.dcmread(directory + "/" + dataset + "/" + study_id + "/" + ser + "/" + s)
            series_df.loc[len(series_df.index)] = [s, img[0x0020,0x0013].value]
            
            # 0x0020,0x0013 refers to image number, comes from Dicom dictionary (https://imagej.nih.gov/nih-image/download/nih-image_spin-offs/NucMed_Image/DICOM%20Dictionary)
 
        series_df['instance_number'] = pd.to_numeric(series_df['instance_number'])

        # Sort the image list by InstanceNumber
        series_df = series_df.sort_values(by=['instance_number'])
        
        # Find the image in the middle of the list
        middle_index = int(series_df.shape[0] / 2)
        middle_image = series_df.iloc[middle_index]['image']

        middle_images.append(ser + "/" + middle_image)

    return middle_images


#Given the image orientation, returns the image plane 
def get_image_plane(loc):

    row_x = round(loc[0])
    row_y = round(loc[1])
    row_z = round(loc[2])
    col_x = round(loc[3])
    col_y = round(loc[4])
    col_z = round(loc[5])

    if row_x == 1 and row_y == 0 and col_x == 0 and col_y == 0:
        return "Coronal"

    if row_x == 0 and row_y == 1 and col_x == 0 and col_y == 0:
        return "Sagittal"

    return 'Axial'

#for getting the image plane corresponding to the middle images of all the series of a particular patient
def plot_images(images, image_id):
    result = []
    for img in images:
        image = pydicom.dcmread(directory + "/" + dataset + "/" + image_id + "/" + img)
        # 0x0020,0x0037 in dicom dictionary refers to "Image Orientation (Patient)"
        image_orientation_patient = image[0x0020,0x0037]
        plane = get_image_plane(image_orientation_patient)
        result.append(plane)
        
    return result

df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
# x:05 converts columns to have a fixed width(to include leading zeroes)
df['BraTS21ID'] = df['BraTS21ID'].apply(lambda x: f"{x:05}")
hey = df['BraTS21ID'].apply(lambda x: plot_images(get_middle_images(x), x))
heyhey = pd.DataFrame.from_dict(dict(zip(hey.index, hey.values))).T
df[['FLAIR', 'T1w', 'T1wCE', 'T2w']] = 0
df[['FLAIR', 'T1w', 'T1wCE', 'T2w']] = heyhey

In [3]:
data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
pytorch3dpath = "../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D"
    
mri_types = ['FLAIR','T1w','T1wCE','T2w']
angle_types = ['Axial', 'Coronal', 'Sagittal']
SIZE = 256
NUM_IMAGES = 64

sys.path.append(pytorch3dpath)
from efficientnet_pytorch_3d import EfficientNet3D

## Functions to load images

In [4]:
from pydicom.pixel_data_handlers import apply_voi_lut
def load_dicom_image(path, img_size=SIZE):
    dicom = pydicom.read_file(path)
    data = apply_voi_lut(dicom.pixel_array, dicom)
    if dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"))
    
    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
            
    return np.expand_dims(img3d,0)

a = load_dicom_images_3d("00000")
a.shape

(1, 256, 256, 64)

In [5]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(42)

## Model and training classes

In [6]:
class Dataset(torch_data.Dataset):
    def __init__(self, paths, targets=None, mri_type=None, label_smoothing=0.01, split="train"):
        self.paths = paths
        self.targets = targets
        self.mri_type = mri_type
        self.label_smoothing = label_smoothing
        self.split = split
          
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, index):
        scan_id = self.paths[index]
        if self.targets is None:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split=self.split)
        else:
            data = load_dicom_images_3d(str(scan_id).zfill(5), mri_type=self.mri_type[index], split="train")

        if self.targets is None:
            return {"X": torch.tensor(data).float(), "id": scan_id}
        else:
            y = torch.tensor(abs(self.targets[index]-self.label_smoothing), dtype=torch.float)
            return {"X": torch.tensor(data).float(), "y": y}


In [7]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
        n_features = self.net._fc.in_features
        self.net._fc = nn.Linear(in_features=n_features, out_features=1, bias=True)
    
    def forward(self, x):
        out = self.net(x)
        return out

## Predict function

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
modeldict = {
    'FLAIR': {
        'Axial': '../input/lotsof3defficientnets/FLAIR-Axial-e6-loss0.680.pth',
        'Coronal': '../input/lotsof3defficientnets/FLAIR-Coronal-e10-loss0.635.pth',
        'Sagittal': '../input/lotsof3defficientnets/FLAIR-Sagittal-e1-loss0.696.pth',
    },
    
    'T1w': {
        'Axial': '../input/lotsof3defficientnets/T1w-Axial-e3-loss0.693.pth',
        'Coronal': '../input/lotsof3defficientnets/T1w-Coronal-e3-loss0.697.pth',
        'Sagittal': '../input/lotsof3defficientnets/T1wCE-Sagittal-e1-loss0.695.pth',
    },
    
    'T1wCE': {
        'Axial': '../input/lotsof3defficientnets/T1wCE-Axial-e3-loss0.693.pth',
        'Coronal': '../input/lotsof3defficientnets/T1wCE-Coronal-e10-loss0.683.pth',
        'Sagittal': '../input/lotsof3defficientnets/T1wCE-Sagittal-e1-loss0.695.pth',
    },
    
    'T2w': {
        'Axial': '../input/lotsof3defficientnets/T2w-Axial-e5-loss0.673.pth',
        'Coronal': '../input/lotsof3defficientnets/T2w-Coronal-e1-loss0.693.pth',
        'Sagittal': '../input/lotsof3defficientnets/T2w-Sagittal-e10-loss0.599.pth',
    }
}

In [10]:
def predict(modelfile, df, mri_type, angle_type, split):
    print("Predict:", modelfile, mri_type, angle_type, df.shape)
    
    df = df[df[mri_type]==angle_type]
    df.loc[:,"MRI_Type"] = mri_type
    data_retriever = Dataset(
        df.BraTS21ID.values, 
        mri_type=df["MRI_Type"].values,
        split=split
    )

    data_loader = torch_data.DataLoader(
        data_retriever,
        batch_size=4,
        shuffle=False,
        num_workers=2,
    )
   
    model = Model()
    model.to(device)
    
    checkpoint = torch.load(modelfile, map_location=device)
    model.load_state_dict(checkpoint["model_state_dict"])
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].to(device))).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"])
#             ids.extend(batch["id"])
            
    preddf = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred}) 
    preddf = preddf.set_index("BraTS21ID")
    return preddf

## Ensemble for submission

In [11]:
submission = df.copy()
submission["MGMT_value"] = 0
for mtype in mri_types:
    for atype in angle_types:
        m = modeldict[mtype][atype]
        try:
            pred = predict(m, submission, mtype, atype, split="test")
        except ValueError:
            continue
        submission = pd.merge(submission, pred, how='left', on='BraTS21ID').fillna(0)
        submission['MGMT_value'] = submission['MGMT_value_x'] + submission['MGMT_value_y']
        submission = submission[['BraTS21ID', 'MGMT_value', 'FLAIR', 'T1w', 'T1wCE', 'T2w']]

submission = submission[['BraTS21ID', 'MGMT_value']]
submission["MGMT_value"] /= len(mri_types)

Predict: ../input/lotsof3defficientnets/FLAIR-Axial-e6-loss0.680.pth FLAIR Axial (87, 6)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


FileNotFoundError: [Errno 2] No such file or directory: '../input/lotsof3defficientnets/FLAIR-Axial-e6-loss0.680.pth'

<!-- submission.to_csv("submission.csv", index=False) -->

In [None]:
submission.to_csv("submission.csv", index=False)