# RSNA Intracranial Hemorrhage Detection 

In [None]:
import numpy as np
import pandas as pd
import pydicom
import os
import random
import cv2
import tensorflow as tf
from math import ceil
import matplotlib.pyplot as plt
import keras
from keras.layers import Dense
from keras.models import Model, load_model
from keras.applications.resnet50 import ResNet50
from keras.utils import Sequence
from keras.losses import binary_crossentropy
from keras.optimizers import Adam
from sklearn.model_selection import StratifiedShuffleSplit

In [None]:
# Random Seed
SEED = 42
np.random.seed(SEED)

# some constants
TEST_SIZE = 0.06
HEIGHT = 224
WIDTH = 224
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 16

# Train and Test folders
input_folder = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/'
path_train_img = input_folder + 'stage_2_train/'
path_test_img = input_folder + 'stage_2_test/'

In [None]:
train_df = pd.read_csv(input_folder + 'stage_2_train.csv')
train_df.head()

In [None]:
# extract subtype
train_df['sub_type'] = train_df['ID'].apply(lambda x: x.split('_')[-1])
# extract filename
train_df['file_name'] = train_df['ID'].apply(lambda x: '_'.join(x.split('_')[:2]) + '.dcm')
train_df.head()

In [None]:
train_df.shape

In [None]:
# remove duplicates
train_df.drop_duplicates(['Label', 'sub_type', 'file_name'], inplace=True)
train_df.shape

In [None]:
print("Number of train images availabe:", len(os.listdir(path_train_img)))

In [None]:
train_final_df = pd.pivot_table(train_df.drop(columns='ID'), index="file_name", \
                                columns="sub_type", values="Label")
train_final_df.head()

In [None]:
train_final_df.shape

In [None]:
# Invalid image ID_6431af929.dcm
train_final_df.drop('ID_6431af929.dcm', inplace=True)

In [None]:
train_small_df = train_final_df.head(0)

epidural_df = train_final_df[train_final_df.epidural == 1]
intraparenchymal_df = train_final_df[train_final_df.intraparenchymal == 1]
intraventricular_df = train_final_df[train_final_df.intraventricular == 1]
subarachnoid_df = train_final_df[train_final_df.subarachnoid == 1]
subdural_df = train_final_df[train_final_df.subdural == 1]

non_df=train_final_df[(train_final_df.epidural == 0) & (train_final_df.intraparenchymal == 0) & \
                      (train_final_df.intraventricular == 0) & (train_final_df.subarachnoid == 0) & \
                      (train_final_df.subdural == 0)]

train_small_df = pd.concat([train_small_df,epidural_df[:2000],intraparenchymal_df[:2000],\
                            intraventricular_df[:2000],subarachnoid_df[:2000],subdural_df[:2000],\
                            non_df[:10000] ])

In [None]:
# Taken From Gradient and Sigmoid Windowing
# https://www.kaggle.com/reppic/gradient-sigmoid-windowing
def get_dicom_field_value(val):
    if type(val) == pydicom.multival.MultiValue:
        return int(val[0])
    else:
        return int(val)

def get_windowing(data):
    dicom_fields = [data.WindowCenter, data.WindowWidth, \
                    data.RescaleSlope, data.RescaleIntercept]
    return [get_dicom_field_value(x) for x in dicom_fields]

def get_windowed_image(image, wc, ww, slope, intercept):
    img = (image*slope +intercept)
    img_min = wc - ww//2
    img_max = wc + ww//2
    img[img<img_min] = img_min
    img[img>img_max] = img_max
    return img 


def _normalize(img):
    if img.max() == img.min():
        return np.zeros(img.shape)
    return 2 * (img - img.min())/(img.max() - img.min()) - 1

def _read(path, desired_size=(224, 224)):
    # 1. read dicom file
    dcm = pydicom.dcmread(path)
    
    # 2. Extract meta data features
    # window center, window width, slope, intercept
    window_params = get_windowing(dcm)
    try:
        # 3. Generate windowed image
        img = get_windowed_image(dcm.pixel_array, *window_params)
    except:
        img = np.zeros(desired_size)

    img = _normalize(img)

    if desired_size != (512, 512):
        # resize image
        img = cv2.resize(img, desired_size, interpolation = cv2.INTER_LINEAR)
    return img[:,:,np.newaxis]

In [None]:
_read(path_train_img + 'ID_ffff922b9.dcm', (128, 128)).shape

In [None]:
plt.imshow(
    _read(path_train_img + 'ID_ffff922b9.dcm', (128, 128))[:, :, 0]
)

In [None]:
# Train Data Generator - Used from Stanford website
# https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
class TrainDataGenerator(keras.utils.Sequence):

    def __init__(self, dataset, labels, batch_size=16, img_size=(512, 512), img_dir = path_train_img, *args, **kwargs):
        self.dataset = dataset
        self.ids = dataset.index
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = img_dir
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X, Y = self.__data_generation(indices)
        return X, Y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))
        np.random.shuffle(self.indices)
        
    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size, 3))
        Y = np.empty((self.batch_size, 6), dtype=np.float32)
        
        for i, index in enumerate(indices):
            ID = self.ids[index]
            image = _read(self.img_dir + ID, self.img_size)
            X[i,] = image            
            Y[i,] = self.labels.iloc[index].values        
        return X, Y
    

In [None]:
# Oversampling
#epidural_df = train_final_df[train_final_df.epidural == 1]
#train_final_df = pd.concat([train_final_df, epidural_df])
#print('Train Shape: {}'.format(train_final_df.shape))

In [None]:
base_model =  ResNet50(weights = 'imagenet', include_top = False, \
                                 pooling = 'avg', input_shape = (HEIGHT, WIDTH, 3))
x = base_model.output
#x = Dropout(0.125)(x)
output_layer = Dense(6, activation = 'sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output_layer)
model.compile(optimizer = Adam(learning_rate = 0.0001), 
                  loss = 'binary_crossentropy',
                  metrics = ['acc'])

In [None]:
# Mutlilabel stratification
splits = StratifiedShuffleSplit(n_splits = 2, test_size = TEST_SIZE, random_state = SEED)
file_names = train_small_df.index
labels = train_small_df.values
# Lets take only the first split
split = next(splits.split(file_names, labels))
train_idx = split[0]
valid_idx = split[1]
submission_predictions = []
len(train_idx), len(valid_idx)


In [None]:
# train data generator
data_generator_train = TrainDataGenerator(train_small_df.iloc[train_idx], 
                                                train_small_df.iloc[train_idx], 
                                                TRAIN_BATCH_SIZE, 
                                                (WIDTH, HEIGHT))

# validation data generator
data_generator_val = TrainDataGenerator(train_small_df.iloc[valid_idx], 
                                            train_small_df.iloc[valid_idx], 
                                            VALID_BATCH_SIZE, 
                                            (WIDTH, HEIGHT))

In [None]:
len(data_generator_train), len(data_generator_val)

In [None]:
model.fit_generator(generator = data_generator_train,
                            validation_data = data_generator_val,
                            epochs = 2,
                            verbose = 1)

In [None]:
model.save('model.h5')

In [None]:
X = np.empty((1, 224, 224, 3))
X[0, ] = _read(path_train_img + 'ID_000012eaf.dcm', (224, 224))
plt.imshow(
    _read(path_train_img + 'ID_000012eaf.dcm', (224, 224))[:, :, 0]
 )

In [None]:
# np.argmax(model.predict(X))

y = model.predict(X)
pred = zip(y.tolist()[0], train_small_df.columns)
for i in pred:
    print(i)