In [1]:
import sys
sys.path.append('/host/d/Github')
import os
import numpy as np
import pandas as pd
import nibabel as nb
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
import Osteosarcoma.functions_collection as ff
import Osteosarcoma.Build_lists.Build_list as Build_list

from __future__ import annotations

import os  # needed navigate the system to get the input data

import radiomics
from radiomics import (
    featureextractor,  # This module is used for interaction with pyradiomics
)

### initiate extractor

In [2]:
# Instantiate the extractor
paramPath = '/host/d/Github/Osteosarcoma/radiomics_settings/MR_setting_1.yaml'
extractor = featureextractor.RadiomicsFeatureExtractor(paramPath)

print('Extraction parameters:\n\t', extractor.settings)
print('Enabled filters:\n\t', extractor.enabledImagetypes)
print('Enabled features:\n\t', extractor.enabledFeatures)

Extraction parameters:
	 {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': True, 'normalizeScale': 100, 'removeOutliers': None, 'resampledPixelSpacing': [1, 1, 1], 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True, 'binWidth': 10, 'voxelArrayShift': 300, 'geometryTolerance': 0.0001}
Enabled filters:
	 {'Original': {}, 'LoG': {'sigma': [2.0, 4.0, 6.0]}, 'Wavelet': {}}
Enabled features:
	 {'shape': None, 'firstorder': None, 'glcm': ['Autocorrelation', 'JointAverage', 'ClusterProminence', 'ClusterShade', 'ClusterTendency', 'Contrast', 'Correlation', 'DifferenceAverage', 'DifferenceEntropy', 'DifferenceVariance', 'JointEnergy', 'JointEntropy', 'Imc1', 'Imc2', 'Idm', 'Idmn', 'Id', 'Idn', 'InverseVariance', 'MaximumProbability', 'SumEntropy', 'SumSquares'], 'glrlm': None, 'glszm': None, 'gldm': None, 'ngtdm': None}


### define patient list

In [9]:
build = Build_list.Build(os.path.join('/host/d/Data/Habitats/Jishuitan/Patient_lists', 'labels_with_image_info_seg_reader2.xlsx'))
batch_list, patient_index_list, label_list, image_path_list, mask_path_list = build.__build__()
print(f'Number of cases to process: {len(image_path_list)}')

Number of cases to process: 30


### extract features

In [10]:
rows = []
for i in range(0,len(patient_index_list)):
    img_p = image_path_list[i]
    msk_p = mask_path_list[i]
    cid = patient_index_list[i]
    print('i', i, ' image path:', img_p, 'mask path:', msk_p)

    result = extractor.execute(img_p, msk_p)

    # Keep only radiomics features (drop diagnostics)
    feats = {k: v for k, v in result.items() if not k.startswith("diagnostics_")}
    feats["Patient_index"] = cid
    feats["Image_filepath"] = img_p
    feats["Mask_filepath"] = msk_p

    rows.append(feats)

i 0  image path: /host/d/Data/Habitats/Jishuitan/original_data/1/img.nii.gz mask path: /host/d/Data/Habitats/Jishuitan/original_data/1/label_reader2.nii.gz
i 1  image path: /host/d/Data/Habitats/Jishuitan/original_data/5/img.nii.gz mask path: /host/d/Data/Habitats/Jishuitan/original_data/5/label_reader2.nii.gz
i 2  image path: /host/d/Data/Habitats/Jishuitan/original_data/7/img.nii.gz mask path: /host/d/Data/Habitats/Jishuitan/original_data/7/label_reader2.nii.gz
i 3  image path: /host/d/Data/Habitats/Jishuitan/original_data/8/img.nii.gz mask path: /host/d/Data/Habitats/Jishuitan/original_data/8/label_reader2.nii.gz
i 4  image path: /host/d/Data/Habitats/Jishuitan/original_data/11/img.nii.gz mask path: /host/d/Data/Habitats/Jishuitan/original_data/11/label_reader2.nii.gz
i 5  image path: /host/d/Data/Habitats/Jishuitan/original_data/15/img.nii.gz mask path: /host/d/Data/Habitats/Jishuitan/original_data/15/label_reader2.nii.gz
i 6  image path: /host/d/Data/Habitats/Jishuitan/original_da

In [13]:
df = pd.DataFrame(rows)

# Put id/path columns first
front_cols = [c for c in ["Patient_index", "Image_filepath", "Mask_filepath"] if c in df.columns]
other_cols = [c for c in df.columns if c not in front_cols]
df = df[front_cols + other_cols]


# df.to_excel('/host/d/projects/Habitats/radiomics/radiomics_measurements.xlsx', index=False)

In [None]:
non_feature_cols = ["Patient_index", "Image_filepath", "Mask_filepath"]
feature_names = [c for c in df.columns if c not in non_feature_cols]

feature_index = list(range(1, len(feature_names) + 1))

feature_table = pd.DataFrame({
    "feature_index": feature_index,
    "feature_name": feature_names
})

# feature_table.to_excel('/host/d/projects/Habitats/radiomics/radiomics_features_list.xlsx', index=False)

### normalize features

#### normalize for reader 1

In [None]:
### normalize features
# df = pd.read_excel('/host/d/projects/Habitats/radiomics/radiomics_measurements.xlsx')
### normalize features to [0,1]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
non_feature_cols = ["Patient_index", "Image_filepath", "Mask_filepath"]
feature_cols = [c for c in df.columns if c not in non_feature_cols]
df_features = df[feature_cols]
df_features_scaled = pd.DataFrame(scaler.fit_transform(df_features), columns=feature_cols)
df_scaled = pd.concat([df[non_feature_cols], df_features_scaled], axis=1)
df_scaled.to_excel('/host/d/projects/Habitats/radiomics/radiomics_measurements_normalized.xlsx', index=False)

In [None]:
feature_min = scaler.data_min_
feature_max = scaler.data_max_

feature_table = pd.read_excel('/host/d/projects/Habitats/results/radiomics_features_list.xlsx')
feature_table['feature_min'] = feature_min
feature_table['feature_max'] = feature_max
feature_table.to_excel('/host/d/projects/Habitats/results/radiomics_features_list.xlsx', index=False)

#### normalize for reader 2, will use data_min and data_max from reader 1 normalization

In [15]:
# for each feature, get the scaler.data_min_ and data_max_ from reader 1 normalization and use them to normalize reader 2 features
scale_df = pd.read_excel('/host/d/projects/Habitats/radiomics/radiomics_features_list.xlsx')
df_reader2 = df.copy()
feature_names = [c for c in df_reader2.columns if c not in non_feature_cols]
for feature in feature_names:
    f_min = scale_df.loc[scale_df['feature_name'] == feature, 'feature_min'].values[0]
    f_max = scale_df.loc[scale_df['feature_name'] == feature, 'feature_max'].values[0]
    df_reader2[feature] = (df_reader2[feature] - f_min) / (f_max - f_min)
df_reader2.to_excel('/host/d/projects/Habitats/radiomics/radiomics_measurements_normalized_reader2.xlsx', index=False)

