In [1]:

from __future__ import print_function
import sys
import os
import logging
import six
from natsort import os_sorted
from radiomics import featureextractor, getFeatureClasses, firstorder, getTestCase, glcm, glrlm, glszm, imageoperations, shape
import radiomics
import numpy as np
import nrrd
import SimpleITK as sitk

import pandas as pd


import pydicom as pdm
import nilearn as nl
import nilearn.plotting as nlplt
import nrrd
import h5py

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.animation as anim

import imageio
from skimage.transform import resize
from skimage.util import montage


import warnings
warnings.simplefilter("ignore")



In [None]:
# Imagepath is the path where MRI patient images are stored
# maskPath is the path where corresponding tumor ROIs are stored
# clinicalpath is the file containing patient ID/gleason score/ucla (likert score)/PSA level/Tumor target index (ignore)

imagePath =
maskPath =
clinicalpath =


# Read in patient ID/gleason score/ucla (likert score)/PSA level/Tumor target index (ignore)
df = pd.read_csv(clinicalpath, delimiter = ',')


# This method takes MRI image path and returns patient ID
def getidfrommr(path):

    return path.split('MR data\\')[1].split('.nrrd')[0]

# This method takes segmentation data path and returns patient ID
def getidfromsg(path):
    return path.split('seriesUID-')[1].split('.nrrd')[0]

# This method takes patient ID and returns ucla Likert score
def getgleasonfromid(id):

   num = df.loc[df['ids'] == id]['gleason'].values.astype(int)
   return num
# This method takes patient ID and returns UCLA Likert score
def getuclafromid(id):
   num = df.loc[df['ids'] == id]['ucla'].values.astype(int)
   return num
# This method takes patient ID and returns PSA levels.
def getpsafromid(id):
   num = df.loc[df['ids'] == id]['PSA'].values
   return num

# This method takes patient ID and returns tumor ROI path
def gettargetnum(id):

    return df.loc[df['ids'] == id]['target'].astype(int)

In [None]:
# Now you need to start organizing the data. You can use os.walk() to find the MRI files and tumor ROI files,
# However, for each patient, you will use the patient ID ( can be found in both excel file and MRI filename and tumor ROI target name)
# And match each MRI file to the correct tumor ROI file.
# You can use any of the previous method to do this.


# This stores the MRI file path
each_MR_path = []

# This stores the tumor ROI file path
each_stl_path = []


# This stores the patient ID
patient_idx = []

In [2]:
# Now you can perform feature extraction.
# Make sure each patient id in each_MR_path matches the one in each_stl_path before you perform this step!!!

# datapyr is the pyradiomics folder. You need to locate this on your PC!
datapyr = 'C:\\Users\\rickl\PycharmProjects\pyradiomics'

# params is the file path for the feature extraction template. Please go to this folder and select the corresponding template
# You can choose between exampleMR_3mm.yaml and exampleMR_5mm.yaml
params = os.path.join(datapyr,  'examples', 'exampleSettings', 'exampleMR_3mm.yaml')

# This instantiates the extractor based on your selected template
extractor = featureextractor.RadiomicsFeatureExtractor(params)

# This will print out all the enabled filters and features.
print('Extraction parameters:\n\t', extractor.settings)
print('Enabled filters:\n\t', extractor.enabledImagetypes)
print('Enabled features:\n\t', extractor.enabledFeatures)
print()

# Create dictionary to store features from each group of tumor grade
features= {}
benign_features = {}
low_grade_features = {}
med_grade_features = {}
high_grade_features = {}


# for loop run through each patient to extract features.
# First, load in the MRI file using sitk.ReadImage. Then load the corresponding mask.
# Then set the mask direction and origin to that of the MR image.
# For each patient, we get the id and use that to get the gleason score/PSA/UCLA Likert score
# Then we classify the patient into corresponding tumor grade.
# Then we extract and store the features as well as PSA and Likert scores in the dictionary.

for i in range(0, len(each_MR_path)):

    try:

        image = sitk.ReadImage(each_MR_path[i])

        mask = sitk.ReadImage(each_stl_path[i])
        mask.SetDirection(image.GetDirection())
        mask.SetOrigin(image.GetOrigin())

        id = getidfrommr(each_MR_path[i])
        gleason = getgleasonfromid(id)
        ucla = getuclafromid(id)
        psa = getpsafromid(id)

        if (gleason == 0).all():
            benign_features[id] = extractor.execute(image, mask)
            benign_features[id]['ucla'] = ucla
            benign_features[id]['PSA'] = psa
        elif (gleason  == 6).all():
            low_grade_features[id] = extractor.execute(image, mask)
            low_grade_features[id]['ucla'] = ucla
            low_grade_features[id]['PSA'] = psa
        elif (gleason == 7).all():
            med_grade_features[id] = extractor.execute(image, mask)
            med_grade_features[id]['ucla'] = ucla
            med_grade_features[id]['PSA'] = psa
        elif (gleason ==  8).all():
            high_grade_features[id] = extractor.execute(image, mask)
            high_grade_features[id]['ucla'] = ucla
            high_grade_features[id]['PSA'] = psa

        print('Done', i)

    except ValueError as e:
        if len(e.args) > 0 and 'Size of the ROI is too small' in e.args[0]:
            pass
        else:
            print(e.args[0])
            raise e

features['Benign features'] = benign_features
features['Low risk features'] = low_grade_features
features['Med risk features'] = med_grade_features
features['High risk features'] = high_grade_features

Extraction parameters:
	 {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': True, 'normalizeScale': 100, 'removeOutliers': None, 'resampledPixelSpacing': [2, 2, 2], 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True, 'binWidth': 5, 'voxelArrayShift': 300}
Enabled filters:
	 {'Original': {}, 'LoG': {'sigma': [2.0, 3.0, 4.0, 5.0]}, 'Wavelet': {}}
Enabled features:
	 {'shape': None, 'firstorder': None, 'glcm': ['Autocorrelation', 'JointAverage', 'ClusterProminence', 'ClusterShade', 'ClusterTendency', 'Contrast', 'Correlation', 'DifferenceAverage', 'DifferenceEntropy', 'DifferenceVariance', 'JointEnergy', 'JointEntropy', 'Imc1', 'Imc2', 'Idm', 'Idmn', 'Id', 'Idn', 'InverseVariance', 'MaximumProbability', 'SumEntropy', 'SumSquares'], 'glrlm': None, 'glszm': None, 'gldm': None}



In [None]:
# Make sure you save the features into either pickle file or csv file
import pickle
feature_filename =

with open(feature_filename,'wb') as pickle_file:
    pickle.dump(features, pickle_file)




In [None]:
# Move on to feature selection and classification


