In [None]:
# %%bash
# pip install pydicom opencv-python scikit-image
# pip install pyradiomics

In [40]:
import cv2 as cv
import numpy as np
import os
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from joblib import dump, load
import pydicom as dicom
import radiomics
from radiomics import featureextractor
import SimpleITK as sitk
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt

In [None]:
def cut_images(input_path, output_path, new_width, new_height):
    """
    Cut images into the desired size and save the output images
    
    Params:  
    input_path = path to the original images 
    output_path = path to save the cut images
    new_width = width of the cut images
    new_height = height of the cut images
    
    Return:
    None
    """
    
    # Create dir if it doesn't exist 
    os.makedirs(output_directory, exist_ok=True)

    n_images = 0

    # Browse input path
    for class_dir in os.listdir(input_path):
        class_path = os.path.join(input_path, class_dir)

        # If it is a directory 
        if os.path.isdir(class_path):     

            # Save image id
            image_id = 1

            # Go through images
            for image_file in os.listdir(class_path):
                image_path = os.path.join(class_path, image_file)

                # Save patient id
                patient = image_file.split("_")[0]                         

                image = cv.imread(image_path)

                # If image exists
                if image is not None:

                    # Save subimage id
                    sub_id = 1

                    for i in range(0, image.shape[0], new_height):
                        for j in range(0, image.shape[1], new_width):

                            # Cut image into subimage
                            sub_image = image[i:i+new_height, j:j+new_width]

                            # Output file path
                            output_file = f"{patient}_img{image_id}-{sub_id}.png"                   
                            output_file = os.path.join(output_path, class_dir, output_file)                        

                            # Save subimage
                            cv.imwrite(output_file, sub_image)                 

                            sub_id += 1
                            n_images += 1

                    image_id += 1   


In [53]:
def read_images(input_path):
    """
    Read images in the input_path, 
    save image, patient of each image and the class (group/labels)
    
    Params: 
    input_path = path to the original images 
    
    Return:
    images = list of all images
    patients = list with patient id for each image
    classes = list with class for each image
    """
   
    # Lists to save images, patients and classes
    images = []
    patients = []
    classes = []

    # Browse input path
    for class_dir in os.listdir(input_path):
        class_path = os.path.join(input_path, class_dir)

        # If it is a directory 
        if os.path.isdir(class_path):    

            for image_file in os.listdir(class_path):
                image_path = os.path.join(class_path, image_file)

                patient = image_file.split("_")[0]             

                image = cv.imread(image_path, cv.IMREAD_GRAYSCALE)

                # Append image, patient id and class to list
                images.append(image)
                patients.append(patient)
                classes.append(class_dir)    
                
    return (images, patients, classes)

In [19]:
def divide_folds(images, patients, classes):
    """
    Divides a dataset into folds for stratified k-fold cross-validation.
    
    Params: 
    images = list of all images
    patients = list with patient id for each image
    classes = list with class for each image
    
    Return:
    folds = list of tuples, each tuple is one folder
    """
    # Create a list of unique indexes for patients
    unique_patients = list(set(patients))

    # Shuffle the list of unique indexes
    random.shuffle(unique_patients)

    # Divide patients into groups
    n_folds = 4 # since it's not an exact division, there will be 5 folds
    fold_size = len(unique_patients) // n_folds
    patients_folds = [unique_patients[i:i+fold_size] for i in range(0, len(unique_patients), fold_size)]

    # List to save folds
    folds = []

    # Divide images into folds based on patients
    for i, patients_folds in enumerate(patients_folds):
        train_patients = [p for p in unique_patients if p not in patients_folds]
        test_patients = patients_folds

        train_indices = [i for i, patient in enumerate(patients) if patient in train_patients]
        test_indices = [i for i, patient in enumerate(patients) if patient in test_patients]

        train_images = [images[i] for i in train_indices]
        test_images = [images[i] for i in test_indices]
        train_classes = [classes[i] for i in train_indices]
        test_classes = [classes[i] for i in test_indices]

        folds.append((train_images, test_images, train_classes, test_classes))
        
    return folds


In [54]:
def apply_thresholds(imgs):    
    """
    Apply Otsu's and Adaptative thresholds to images
    
    Params: 
    imgs = list of raw images
    
    Return: 
    imgs_otsu = Otsu's thresholded images
    imgs_adapt = Adaptative thresholded images    
    """
    
    imgs_otsu = []
    imgs_adapt = []
    
    # For each image in dataset                          
    for img in imgs: 
        
        # Otsu's thresholding
        _, th1 = cv.threshold(img, 100, 1, cv.THRESH_BINARY+cv.THRESH_OTSU)
        
        # Adaptative gaussian thresholding        
        #th2 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY,11,2)
        th3 = cv.adaptiveThreshold(img, 1, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 11, 2)
               
        imgs_otsu.append(th1)
        imgs_adapt.append(th3)    
        
    return (imgs_otsu, imgs_adapt)

In [25]:
def update_folds(folds):
    """
    For each fold, add Otsu's and adaptive
    thresholding of all test and train images
    
    Params:
    folds = list of tuples containing the images divided in folds
    
    Returns:
    new_folds = updated list of tuples containing the images and masks divided in folds
    """
    
    new_folds = []
    
    # For each fold
    for i in range(len(folds)):
        fold = folds[i]        

        x_train = fold[0]
        x_test = fold[1]
        y_train = fold[2]
        y_test = fold[3]

        x_train_otsu, x_train_adapt = apply_thresholds(x_train)
        #x_test_otsu, x_test_adapt = apply_thresholds(x_test)  

        # Update list with the thresholds   
        new_folds.append((x_train, x_train_otsu, x_train_adapt, x_test, y_train, y_test))
        #new_folds[i] = (x_train, x_train_otsu, x_train_adapt, x_test, x_test_otsu, x_test_adapt, y_train, y_test)
        
    return new_folds

In [55]:
# Cut images into 40x30
#cut_images("imagens_ihq_er", "imagens_cortadas", 40, 30)

# Save images, patients and classes
images, patients, classes = read_images("imagens_cortadas")

# Divide images into folds
folds = divide_folds(images, patients, classes)

# Apply Otsu's and adaptive thresholds
folds = update_folds(folds)

# Save folds
dump(folds, '../folds.joblib')

# Load folds
# folds = load('../folds.joblib')

['../folds.joblib']

In [2]:
# Load folds
folds = load('../folds.joblib')

Extract features with PyRadiomics

In [115]:
def extract_features(imgs, otsu, adapt, extractor):    
    """
    Extract features using sitk and pyradiomics
    
    Params:
    imgs = raw images
    otsu = masked images with otsu thresholding
    adapt = masked images with adaptative thresholding
    extractor = pyradiomics extractor
    
    Returns:
    features_otsu = features for otsu mask
    features_adapt = features for adaptative mask
    """
    
    data_spacing=[1,1,1]
    features_otsu = []
    features_adapt = []
    
    n = 0
        
    for idx in range(len(imgs)): 

        img = imgs[idx]
        img_otsu = otsu[idx]
        img_adapt = adapt[idx]

        sitk_img = sitk.GetImageFromArray(img)
        sitk_img.SetSpacing((float(data_spacing[0]), float(data_spacing[1]), float(data_spacing[2])))
        sitk_img = sitk.JoinSeries(sitk_img)

        sitk_otsu = sitk.GetImageFromArray(img_otsu)
        sitk_otsu.SetSpacing((float(data_spacing[0]), float(data_spacing[1]), float(data_spacing[2])))
        sitk_otsu = sitk.JoinSeries(sitk_otsu)
        sitk_otsu = sitk.Cast(sitk_otsu, sitk.sitkInt32)

        sitk_adapt = sitk.GetImageFromArray(img_adapt)
        sitk_adapt.SetSpacing((float(data_spacing[0]), float(data_spacing[1]), float(data_spacing[2])))
        sitk_adapt = sitk.JoinSeries(sitk_adapt)
        sitk_adapt = sitk.Cast(sitk_otsu, sitk.sitkInt32)       
        
        try:
            features_otsu.append(extractor.execute(sitk_img, sitk_otsu))
            features_adapt.append(extractor.execute(sitk_img, sitk_adapt))
            n += 1      
                
        except: 
            print(f"{n}, ", end="")
            pass          
        
    return (features_otsu, features_adapt)

    

In [91]:
sitk_otsu = sitk.GetImageFromArray(o[0])
sitk_otsu.SetSpacing((1, 1, 1))
sitk_otsu = sitk.JoinSeries(sitk_otsu)
sitk_otsu = sitk.Cast(sitk_otsu, sitk.sitkInt32)

sitk_img = sitk.GetImageFromArray(data[0])
sitk_img.SetSpacing((1, 1, 1))
sitk_img = sitk.JoinSeries(sitk_img)

#extractor.execute(sitk_img, sitk_otsu)

In [87]:
sitk_otsu.GetSize()

(40, 30, 1)

In [98]:
len(folds[0][1][:20])

20

In [None]:
len(ft_o)

In [None]:
data = folds[0][0]
o = folds[0][1]
a = folds[0][2]
ft_o, ft_a = extract_features(data, o, a, extractor)

In [19]:
ft_o[0]['diagnostics_Versions_Numpy']

'1.24.3'

In [14]:
# Create feature extractor
# !wget -c https://raw.githubusercontent.com/AIM-Harvard/pyradiomics/master/examples/exampleSettings/Params.yaml
params = 'Params.yaml'
settings = {'label': 1, 'correctMask': True}
extractor = featureextractor.RadiomicsFeatureExtractor(params, additionalInfo=True, **settings)

# Extract features

features_otsu = {}
features_adapt = {}

# For each fold
for i in range(1):
    
    fold = folds[i]
               
    x_train = fold[0]
    x_train_otsu = fold[1]
    x_train_adapt = fold[2]

    
    # Create dataframe to save features
#     df_train_otsu = pd.DataFrame()
#     df_train_adapt = pd.DataFrame()

    
#     # Extract features
    feats_train_otsu, feats_train_adapt = extract_features(x_train, x_train_otsu, x_train_adapt, extractor)
    
    # Filter features and fix data types
    
    # em construção
    
#     features_otsu_filtered = []
#     features_adapt_filtered = []
#     features_names = []
#     names = list(features_otsu.keys())
    
#     for j in range(len(features_otsu)):
#         fo = features_otsu[j]
#         fa = features_adapt[j]
#         name = names[j]
        
#         if type(fo) == np.ndarray:
#             fo = float(fo)
#         else if type(fo) == dict:
#             fo = NA       
#         else if type(fo) == tuple:
#             indexes = []
#             for e in range(len(fo)):
#                 indexes.append(e)
#                 fo[e] = float(fo[e])
   
    
#     df_train_otsu.columns = names
#     df_test_adapt.columns = names
#     df_train_otsu.columns = names
#     df_test_adapt.columns = names

#     fold_id = i+1
#     features_dict[fold_id] = (df_train_otsu, df_test_otsu, df_train_adapt, df_test_adapt, y_train, y_test)
    
    

RuntimeError: Exception thrown in SimpleITK new_Image: /tmp/SimpleITK/Code/Common/src/sitkImageExplicit.cxx:121:
sitk::ERROR: Unsupported number of dimensions specified by size: [ 40 ]!
The maximum supported Image dimension is 5.

In [None]:
# Save folds
dump(feats_train_otsu, '../feats_train_otsu.joblib')
dump(feats_train_adapt, '../feats_train_otsu.joblib')
dump(feats_test_otsu, '../feats_train_otsu.joblib')
dump(feats_test_adapt, '../feats_train_otsu.joblib')

Testando pra ver como pegar as features:

In [8]:
# fold 1, 0 = imagens, 1 = segundo 
test_img = folds[1][0][1]
# fold 1, 1 = imagens com otsu, 1 = segundo elemento
otsu = folds[1][1][1]

In [9]:
data_spacing=[1,1,1]
sitk_img = sitk.GetImageFromArray(test_img)
sitk_img.SetSpacing((float(data_spacing[0]), float(data_spacing[1]), float(data_spacing[2]) ))
sitk_img = sitk.JoinSeries(sitk_img)

sitk_mask = sitk.GetImageFromArray(otsu)
sitk_mask.SetSpacing((float(data_spacing[0]), float(data_spacing[1]), float(data_spacing[2]) ))
sitk_mask = sitk.JoinSeries(sitk_mask)
sitk_mask = sitk.Cast(sitk_mask, sitk.sitkInt32)

In [11]:
features = extractor.execute(sitk_img, sitk_mask)

In [45]:
len(features)

122

In [48]:
float(list(features.values())[-1])

0.021795298681438015

In [51]:
for i in list(features.values()):
    print(type(i) == str)

True
True
True
True
True
False
False
True
True
False
False
False
False
False
True
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [12]:
for key, val in enumerate(features):
    print(val, features[val])

diagnostics_Versions_PyRadiomics v3.1.0
diagnostics_Versions_Numpy 1.24.3
diagnostics_Versions_SimpleITK 2.3.0
diagnostics_Versions_PyWavelet 1.3.0
diagnostics_Versions_Python 3.9.13
diagnostics_Configuration_Settings {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 1, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 255, 'additionalInfo': True, 'binWidth': 25, 'weightingNorm': None}
diagnostics_Configuration_EnabledImageTypes {'Original': {}}
diagnostics_Image-original_Hash ae37fbfe4969d0864fb25c3fecab05ec1787adac
diagnostics_Image-original_Dimensionality 3D
diagnostics_Image-original_Spacing (1.0, 1.0, 1.0)
diagnostics_Image-original_Size (40, 30, 1)
diagnostics_Image-original_Mean 182.48333333333332
diagnostics_Image-original_Minimum 74.0
diagnostics_Image-original_Maximum 255.0
d