In [1]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact

import pydicom as dicom

DATA_DIR = "D:/.coding/datasets/rsna_tumor/"

# Handling labels

In [2]:
labels = pd.read_csv(DATA_DIR + "train_labels.csv", index_col="BraTS21ID")
labels.shape

(585, 1)

In [3]:
n_positive = int(np.sum(labels))
n_negative = int(labels.shape[0] - n_positive)
positive_class_ratio = float(n_positive / labels.shape[0])
print("n_positive:", n_positive)
print("n_negative:", n_negative)
print("positive_class_ratio:", positive_class_ratio)

n_positive: 307
n_negative: 278
positive_class_ratio: 0.5247863247863248


# Preprocessing data

In [17]:
class DataQuery():
    def __init__(self, data_directory):
        self.data_dir = data_directory
        self.train_dir = os.path.join(self.data_dir, "train/")
        self.test_dir = os.path.join(self.data_dir, "test/")
        
    def get_train_labels(self):
        return pd.read_csv(self.data_dir + "train_labels.csv", index_col="BraTS21ID")
    
    def get_train_user_list(self):
        return list(pd.read_csv(self.data_dir + "train_labels.csv", usecols=["BraTS21ID"], squeeze=True).astype(str).str.zfill(5))
        
    def get_scan(self, user_id, scan_type, scan_id, test=False):
        # valid scan_type: "FLAIR", "T1w", "T1wCE", "T2w"
        scan_path = self.train_dir + f"{user_id}/{scan_type}/Image-{scan_id}.dcm"
        if test:
            scan_path = self.test_dir + f"{user_id}/{scan_type}/Image-{scan_id}.dcm"         
        return dicom.dcmread(scan_path)
    
    def get_volume(self, user_id, scan_type, drop_empty=True, test=False):
        scan_path = self.train_dir + f"{user_id}/{scan_type}/"
        if test:
            scan_path = self.test_dir + f"{user_id}/{scan_type}" 
        
        dicom_fname_list = [scan_path + fname for fname in os.listdir(scan_path) if fname.endswith(".dcm")]
        volume = np.zeros((512, 512, len(dicom_fname_list)))
        for i in range(len(dicom_fname_list)):
            volume[:, :, i] = dicom.dcmread(dicom_fname_list[i]).pixel_array
        
        if drop_empty:
            slice_sum = np.sum(volume, axis=(0, 1))
            nonzero_slice_idx = np.nonzero(slice_sum)[0]
            return volume[:, :, nonzero_slice_idx]
        
        return volume

In [29]:
@interact(user_idx=(0, 585, 1), scan_type=["FLAIR", "T1w", "T1wCE", "T2w"])
def explore_volume(user_idx=0, scan_type="FLAIR"):
    query_engine = DataQuery(DATA_DIR)
    user_list = query_engine.get_train_user_list()
    volume = query_engine.get_volume(user_list[user_idx], scan_type, drop_empty=True)

    @interact(idx=(1, volume.shape[-1]-1, 1))
    def display_slices(idx=1):
        plt.imshow(volume[:, :, idx])

interactive(children=(IntSlider(value=0, description='user_idx', max=585), Dropdown(description='scan_type', oâ€¦