In [None]:
import os
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
SIZE = 640
IMGS_PATH = './data'

In [None]:
def read_xray(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array        
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8) 
    return data

In [None]:
def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    img = Image.fromarray(array)
    if keep_ratio:
        img.thumbnail((size, size), resample)
    else:
        img = img.resize((size, size), resample)
    return img

In [None]:
image_id = []
dim0 = []
dim1 = []
for split in ['train', 'test']:
    load_path = f'{IMGS_PATH}/{split}/'
    save_path = f'{IMGS_PATH}/{split}_{SIZE}/'
    os.makedirs(save_path, exist_ok=True)
    for file in tqdm(os.listdir(load_path)):
        xray = read_xray(load_path + file)
        im = resize(xray, size=SIZE)# keep_ratio=True to have original aspect ratio
        im.save(save_path + file.replace('dicom', 'png'))
        if split == 'train':
            image_id.append(file.replace('.dicom', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])

In [None]:
df = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1})
df.to_csv(f'{IMGS_PATH}/train_meta_{SIZE}.csv', index=False)