## this file is used to put NIST's AM data into nnUNet's format. Please put this in the same folder as raw NIST data. 

In [39]:
import os
import numpy as np
import nibabel as nib
from nibabel.testing import data_path
import cv2
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.colors import LinearSegmentedColormap
from IPython.display import clear_output
from skimage.transform import resize
import sys
import glob
import json

In [7]:
def create_specimen_dict(specimen='S1S2'):
    def initialize_rand(specimen): 
        # helper function to return a randomly initialized 3D matrix. 
        # get size 
        for root, dirs, files in os.walk("./data"):
            if specimen in root and "h5_files" not in root:
                # read one file to get image size
                files = sorted(files)
                matrix = cv2.imread(os.path.join(os.path.abspath(root),files[1]),cv2.IMREAD_UNCHANGED)
                if "Bernsen" in root:
                    matrix[matrix==255]=1
                    if matrix.max()>1:
                        print(name)
                tifCounter = len(glob.glob1(root,"*.tif"))
                if (tifCounter != 900) and (tifCounter != 749):
                    print(tifCounter)
                assert (tifCounter == 900) or (tifCounter == 749)
                break
        return np.zeros((tifCounter, matrix.shape[0],matrix.shape[1]),dtype=np.uint8) # e.g. (900, 1010, 980)
                        
    d = {}
    # key = 'raw' or 'label'
    # value = np 3d array (0-255 or binary)
    for root, dirs, files in os.walk("./data"):
        if specimen in root and "h5_files" not in root:
            imgs = initialize_rand(specimen) # must re-initialize or else it'll change what has been stored.
            files = sorted(files)
            tifCounter = 0
            for name in files: 
                if ".tif" in name: 
                    path = os.path.join(os.path.abspath(root), name)
                    matrix = cv2.imread(path,cv2.IMREAD_UNCHANGED) # all uint8 when read in
                    
                    if "Bernsen" in root:
                        # convert to binary
                        assert (imgs.max() <= 1)
                        matrix[matrix==255]=1
                        assert(matrix.max()==1)
                    imgs[tifCounter,:,:] = matrix
                    tifCounter += 1
                    
            assert(tifCounter == imgs.shape[0])
            
            if "Med3D" in root: # raw
                d['raw'] = imgs
            elif "Bernsen" in root: # label
                d['label'] = imgs
                d['label'] = 1-d['label'] # 0=>1 = black; 0 = white
                
    assert(d['label'].max()==1)
    
    return d

# Save NIFTI

In [36]:
def normalize(A):
    # normalize across entire 3d matrix
    # convert to float32 to save memory
    return ((A-A.min())/(A.max()-A.min())).astype(np.float32)

# loop over all specimens and get 3d np array of each (label + raw)
training_specs = ['S1S3','S1S4','S1S5'] #
testing_specs = ['S1S2']
output_path = '.' # change this according to need
for spec in training_specs:
    d = create_specimen_dict(spec)
    raw_nii = nib.Nifti1Image(d['raw'], affine=np.eye(4))
    label_nii = nib.Nifti1Image(d['label'], affine=np.eye(4))
    nib.save(raw_nii, os.path.join(output_path, 'imagesTr/'+spec+'_0000.nii.gz'))
    nib.save(label_nii, os.path.join(output_path, 'labelsTr/'+spec+'.nii.gz'))
for spec in testing_specs:
    d = create_specimen_dict(spec)
    raw_nii = nib.Nifti1Image(d['raw'], affine=np.eye(4))
    label_nii = nib.Nifti1Image(d['label'], affine=np.eye(4))
    nib.save(raw_nii, os.path.join(output_path, 'imagesTs/'+spec+'_0000.nii.gz'))
    nib.save(label_nii, os.path.join(output_path, 'labelsTs/'+spec+'.nii.gz'))

# Save JSON

In [42]:
task_name = 'AM'
training_specimen_names = ['S1S3','S1S4','S1S5']
test_specimen_names = ['S1S2']

json_dict = {}
json_dict['name'] = task_name
json_dict['description'] = "NIST_AM for nnUNet"
json_dict['tensorImageSize'] = "3D"
json_dict['reference'] = ""
json_dict['licence'] = ""
json_dict['release'] = "0.0"
json_dict['modality'] = {
    "0": "XCT",
}
json_dict['labels'] = {
    "0": "background",
    "1": "defect",
}

json_dict['numTraining'] = len(training_specimen_names)
json_dict['numTest'] = len(test_specimen_names)
json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i.split("/")[-1], "label": "./labelsTr/%s.nii.gz" % i.split("/")[-1]} for i in
                         training_specimen_names]
json_dict['test'] = ["./imagesTr/%s.nii.gz" % i.split("/")[-1] for i in test_specimen_names]

print(json_dict)
with open(os.path.join(output_path, 'dataset.json'),'w+') as f:
    json.dump(json_dict, f)

{'name': 'AM', 'description': 'NIST_AM for nnUNet', 'tensorImageSize': '3D', 'reference': '', 'licence': '', 'release': '0.0', 'modality': {'0': 'XCT'}, 'labels': ({'0': 'background', '1': 'defect'},), 'numTraining': 3, 'numTest': 1, 'training': [{'image': './images/S1S3_0000.nii.gz', 'label': './labels/S1S3.nii.gz'}, {'image': './images/S1S4_0000.nii.gz', 'label': './labels/S1S4.nii.gz'}, {'image': './images/S1S5_0000.nii.gz', 'label': './labels/S1S5.nii.gz'}], 'test': ['./images/S1S2.nii.gz']}
