## **.nii to Tensor data coverstion**

### *Import Libraries*

In [None]:
import cv2
import random
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt

import torch
import utils

### *Load file and Extract the data as a numpy array*

In [None]:
DATA_DIR = '/content/drive/My Drive/BrainTumourData/'

# This will return numpy array
def load_case(image_nifty_file, label_nifty_file):
    image = np.array(nib.load(image_nifty_file).get_fdata())
    label = np.array(nib.load(label_nifty_file).get_fdata())
    return image, label

In [None]:
# Only for visualization

#image,label = load_case(DATA_DIR + "imagesTr/BRATS_001.nii.gz", DATA_DIR + "labelsTr/BRATS_001.nii.gz")
#image = utils.get_labeled_image(image,label)
#plt.imshow(image[:, :, 54])

### *Data Pre-processing*

In [None]:
# Sub-volume Sampling

import numpy as np
from keras.utils import to_categorical

def get_sub_volume(image, label, 
                   orig_x = 240, orig_y = 240, orig_z = 155, 
                   output_x = 160, output_y = 160, output_z = 16,
                   num_classes = 4, max_tries = 1000, background_threshold = 0.95):
    X = None
    y = None
    tries = 0

    while tries < max_tries:
        # randomly sample sub-volume by sampling the corner voxel
        start_x = np.random.randint(orig_x - output_x + 1)
        start_y = np.random.randint(orig_y - output_y + 1)
        start_z = np.random.randint(orig_z - output_z + 1)

        # extract relevant area of label
        y = label[start_x: start_x + output_x,
                  start_y: start_y + output_y,
                  start_z: start_z + output_z]

        y = to_categorical(y, num_classes)          # (output_x, output_y, output_z, num_classes)

        bgrd_ratio = y[:, :, :, 0].sum() / (output_x * output_y * output_z)             # compute the background ratio

        tries += 1

        if (bgrd_ratio < background_threshold):

            X = np.copy(image[start_x: start_x + output_x,
                              start_y: start_y + output_y,
                              start_z: start_z + output_z, :])
            X = np.moveaxis(X, -1, 0)       # (num_channels, x_dim, y_dim, z_dim)
            y = np.moveaxis(y, -1, 0)       # (num_classes, x_dim, y_dim, z_dim)
            y = y[1:, :, :, :]              # take a subset of y that excludes the background class
    
            return X, y

In [None]:
# Standardization

def standardize(image):

    standardized_image = np.zeros(image.shape)
    # iterate over channels
    for c in range(image.shape[0]):
        for z in range(image.shape[3]):

            image_slice = image[c,:,:,z]                 # get a slice of the image at channel 'c' and z-th dimension 'z'
            centered = image_slice - image_slice.mean()
            if np.std(centered) != 0:
                centered_scaled = image_slice / image_slice.std()
                standardized_image[c, :, :, z] = centered_scaled

    return standardized_image

### *Data Conversion*

In [None]:
def create_dataset(data_dir, patches=100):

    idx = random.sample(list(range(1,484)), patches)
    images = []
    labels = []

    for i in idx:

        if i<10:
            i = 'BRATS_00' + str(i) + '.nii.gz'
        elif i>=10 and i<100:
            i = 'BRATS_0' + str(i) + '.nii.gz'
        elif i>=100:
            i = 'BRATS_' + str(i) + '.nii.gz'

        image, label = load_case(data_dir + 'imagesTr/' + i,
                                 data_dir + 'labelsTr/' + i)

        try: 
            X,y = get_sub_volume(image, label)
            X = standardize(X)
            images.append(X)
            labels.append(y)
        except:
            continue

    return images, labels

### *Save converted data*

In [None]:
images,labels = create_dataset(DATA_DIR, patches=200)

In [None]:
print('Length of data : ', len(images))
print('Shape of each image : ', images[0].shape)
print('Shape of each label : ', labels[0].shape)

Length of data :  173
Shape of each image :  (4, 160, 160, 16)
Shape of each label :  (3, 160, 160, 16)


In [None]:
# change datatype as 'FloatTensor'

images = torch.FloatTensor(images)
labels = torch.FloatTensor(labels)

In [None]:
print('Length of data : ', len(images))
print('Shape of each image : ', images[0].shape)
print('Shape of each label : ', labels[0].shape)

Length of data :  173
Shape of each image :  torch.Size([4, 160, 160, 16])
Shape of each label :  torch.Size([3, 160, 160, 16])


In [None]:
torch.save(images,'/content/drive/My Drive/BrainTumourData/TensorData/X.pt')
torch.save(labels,'/content/drive/My Drive/BrainTumourData/TensorData/y.pt')