In [17]:
import os
import sys
import glob

import torch
import numpy as np
import pydicom as dicom

from skimage.draw import polygon
import matplotlib.pyplot as plt

%matplotlib inline

In [18]:
def read_structure(structure):
    contours = []
    for i, ri in enumerate(structure.ROIContourSequence):
        contour = {}
        #return ri
        contour['color'] = ri.ROIDisplayColor
        contour['number'] = ri.ReferencedROINumber
        #contour['name'] = ri.ROIName
        #assert contour['number'] == ri.ROINumber
        contour['contours'] = [s.ContourData for s in ri.ContourSequence]
        contours.append(contour)
    return contours

In [19]:
def get_mask(contours, slices):
    z = np.around(np.array([float(s.ImagePositionPatient[2]) for s in slices]), 1)
    pos_r = slices[0].ImagePositionPatient[1]
    spacing_r = slices[0].PixelSpacing[1]
    pos_c = slices[0].ImagePositionPatient[0]
    spacing_c = slices[0].PixelSpacing[0]

    label = np.zeros_like(image, dtype=np.uint8)
    for con in contours:
        num = int(con['number'])
        for c in con['contours']:
            nodes = np.array(c).reshape((-1, 3))
            assert np.amax(np.abs(np.diff(nodes[:, 2]))) == 0
            #z_index = z.index(nodes[0, 2])
            try:
                z_index = np.where(z == float(np.around(nodes[0, 2], 1)))[0] # fix in later comments -JH
            except:
                print(z)
                print(nodes[0,2])
                raise
            r = (nodes[:, 1] - pos_r) / spacing_r
            c = (nodes[:, 0] - pos_c) / spacing_c
            rr, cc = polygon(r, c)
            label[rr, cc, z_index] = num

    colors = tuple(np.array([con['color'] for con in contours]) / 255.0)
    return label, colors

In [20]:
train_data_path = '/mnt/USB/AAPM17CTSegChallenge/LCTSC/DOI' #"./DOI" # point to our data -JH
preprocessing_imgdir = "/home/ygx/data/aapm17/preprocessing/imgs"
preprocessing_labeldir = "/home/ygx/data/aapm17/preprocessing/labels"

In [21]:
train_patients = [os.path.join(train_data_path, name)
    for name in os.listdir(train_data_path) if os.path.isdir(os.path.join(train_data_path, name))]

print(f'First Patient: {train_patients[0]}')

First Patient: /mnt/USB/AAPM17CTSegChallenge/LCTSC/DOI/LCTSC-Train-S1-010


In [36]:
for i, patient in enumerate(train_patients):
    print(f"Patient {i}: {patient}")
    image = None
    slices = None
    contours = None
    for subdir, dirs, files in os.walk(patient):
        dcms = glob.glob(os.path.join(subdir, "*.dcm"))
        if len(dcms) == 1:
            structure = dicom.read_file(os.path.join(subdir, files[0]))
            contours = read_structure(structure)
        elif len(dcms) > 1:
            slices = [dicom.read_file(dcm) for dcm in dcms]
            slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
            image = np.stack([s.pixel_array for s in slices], axis=-1)
    if image is not None:
        torch.save(torch.Tensor(image.astype(np.float32)), f"{preprocessing_imgdir}/image_{i}.pth")
    if contours is not None:
        label, colors = get_mask(contours, slices)
        print(f'label: {label.shape}')
        print(f'color: {type(colors)}, len: {len(colors)}, First index: {colors[0]}')
        label_int = label.astype(np.uint8)
        print(f'Integer label: {label_int.shape}')
        torch.save(torch.Tensor(label_int),   f"{preprocessing_labeldir}/label_{i}.pth")
    break

Patient 0: /mnt/USB/AAPM17CTSegChallenge/LCTSC/DOI/LCTSC-Train-S1-010
label: (512, 512, 122)
color: <class 'tuple'>, len: 5, First index: [0.85490196 0.64705882 0.1254902 ]
Integer label: (512, 512, 122)


In [40]:
x = np.array([0., 1.5, .1])

In [41]:
x

array([0. , 1.5, 0.1])

In [42]:
x.astype(np.uint)

array([0, 1, 0], dtype=uint64)