In [7]:
from PIL import Image  #Python image processor package
from tqdm import tqdm  #Package to show a progress bar in loops
import multiprocessing #Spawm threads to execute tasks in parallel
import os              #Util to use OS functions, here it's used to manipulate paths and load files
import pydicom         #Util to manipulate DICOM files (Digital Images in Medicine)
import json            #Manipulate and Read JSON

In [8]:
#Opening the JSON settings file
with open("erniechiew-github-DenseNet-121/SETTINGS.json") as f:
    SETTINGS = json.load(f)

In [9]:
def listdir_nohidden(path):
    for f in os.listdir(path):
        if not f.startswith('.'):
            yield f

In [10]:
#Defining a Function to convert images from DICOM to PNG
SRC_PATH = ""
DST_PATH  = ""
def convert(patientId, src_path=SRC_PATH, dst_path=DST_PATH):
    #PatientID -> Looks like a hash per pacient
    #src_Path -> Path to source image directory
    #dst_Path -> Path to destination image directory
    Image.fromarray(
        pydicom.dcmread(
            os.path.join(SRC_PATH, patientId + '.dcm')  #Loading DICOM image based on the patient ID
        ).pixel_array
    ).save(os.path.join(DST_PATH, patientId + '.png'))  #Saving PNG image based on the patient ID

In [5]:
#########################################################
###############  CONVERTING TEST FILES  #################
#########################################################

#Loading source and destination path for test files
SRC_PATH = os.path.join(SETTINGS["TEST_RAW_DATA_DIR"])
DST_PATH  = os.path.join(SETTINGS["TEST_CLEAN_DATA_DIR"])

print(SRC_PATH)
print(DST_PATH)
assert os.path.lexists(SRC_PATH)
assert os.path.lexists(DST_PATH)

rsna-pneumonia-detection-challenge/stage_2_test_images
rsna-pneumonia-detection-challenge/stage_2_test_images_png


In [6]:
#Creating a list with every patient ID existing in the TEST folder
patient_ids = [os.path.splitext(f)[0] for f in listdir_nohidden(SRC_PATH)]
len(patient_ids) #Quantidade de dados de TESTE

3000

In [7]:
#Cria e executa as threads para a conversão das imagens
with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
    pbar = tqdm( #TQDM para mostrar a barra de progresso do loop
          total=len(patient_ids)
        , leave=False
        , desc="Converting"
    )
    for _ in p.imap_unordered(convert, patient_ids):
        pbar.update(1)
    pbar.close()

                                                               

In [11]:
#########################################################
##############  CONVERTING TRAIN FILES  #################
#########################################################

#Carrega os caminhos para os dados de treinamento
SRC_PATH = os.path.join(SETTINGS["TRAIN_RAW_DATA_DIR"])
DST_PATH  = os.path.join(SETTINGS["TRAIN_CLEAN_DATA_DIR"])

print(SRC_PATH)
print(DST_PATH)
assert os.path.lexists(SRC_PATH)
assert os.path.lexists(DST_PATH)

rsna-pneumonia-detection-challenge/stage_2_train_images
rsna-pneumonia-detection-challenge/stage_2_train_images_png


In [12]:
#Cria uma lista com todos os patient IDs para as imagens existentes no diretório de treinamento
patient_ids = [os.path.splitext(f)[0] for f in listdir_nohidden(SRC_PATH)]

#MAC - Remove .DS_Store
len(patient_ids) #Quantidade de dados de TREINAMENTO

26684

In [13]:
#Converte as imagens paralelamente com barra de progresso
with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
    pbar = tqdm(
          total=len(patient_ids)
        , leave=False
        , desc="Converting"
    )
    for _ in p.imap_unordered(convert, patient_ids):
        pbar.update(1)
    pbar.close()

                                                                 