Published on July 26, 2023. By Marília Prata, mpwolke.

![](https://osmose-it.s3.amazonaws.com/wVl7WRLcQ8201DX8Dt6ZXgJoQ1iZSKGB/_.jpg)
https://www.osmosis.org/learn/Abdominal_trauma:_Clinical_practice

"Blunt abdominal trauma is a leading cause of morbidity and mortality among all age groups. Identification of serious intra-abdominal pathology is often challenging; many injuries may not manifest during the initial assessment and treatment period."

"Computed tomography is the standard for detecting solid organ injuries. CT scans provide excellent imaging of the pancreas, duodenum, and genitourinary system."

"CT scanning often provides the most detailed images of traumatic pathology and may assist in determination of operative intervention. Unlike DPL or FAST, CT can determine the source of hemorrhage."

https://emedicine.medscape.com/article/1980980-overview?form=fpf

#Competition Citation

@misc{rsna-2023-abdominal-trauma-detection,

    author = {Adam Flanders, Chris Carr, Errol Colak, HCL-Jevster, Hui Ming Lin, JeffRudie, John Mongan,
    Luciano Prevedello, Maggie, Martin Görner, Maryam Vazirabad, Michelle Riopel, Robyn Ball},
    
    title = {RSNA 2023 Abdominal Trauma Detection},
    
    publisher = {Kaggle},
    
    year = {2023},
    
    url = {https://kaggle.com/competitions/rsna-2023-abdominal-trauma-detection}
}

In [None]:
%%capture

!pip install -U dicomsdl -q 

In [None]:
import dicomsdl as dicoml
import cv2

import glob
import time
import numpy as np
import random

from matplotlib import pyplot as plt

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

FILE_SIZE = 512

# take images patients directory
train_group = glob.glob("/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/**/*/")
print(f'Patients for processing: {len(train_group)}')

#Ray

"Ray is an open-source unified framework for scaling AI and Python applications. It provides the compute layer for parallel processing so that you don’t need to be a distributed systems expert."

https://docs.ray.io/en/latest/index.html#

"Ray is a unified framework for scaling AI and Python applications. Ray consists of a core distributed runtime and a toolkit of libraries (Ray AIR) for simplifying ML compute."
https://github.com/ray-project/ray

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSOcwPqfGuXSjmFuCl0idrSggDTE5zLe_nS9PRMAjXu92QquFdbiYWzUn2e8YcG0QthBIk&usqp=CAU)twitter

In [None]:
import ray
ray.init(log_to_driver=False)

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

def process_faster(f, size=512, save_folder=None, dicom_process = True, extension="png", group_id = 0):
    
    patient = f.split('/')[-2]
    image_name = f.split('/')[-1][:-4]

    image = np.array(dicoml.open(f).toPilImage())
    img = cv2.resize(image, (size, size))

    file_name = f'{save_folder}' + f"{patient}_{image_name}.{extension}"
    cv2.imwrite(file_name, img)

# sample function - draw circle in center of the image
def draw_circle(img_files):
    for f_img in img_files:
        img = cv2.imread(f_img)
        cv2.circle(img, (int(FILE_SIZE // 2),int(FILE_SIZE // 2)), 20, (255,0,0), 3)
        cv2.imwrite(f_img, img)


@ray.remote
class MessageActor(object):
    def __init__(self):
        self.messages = []
    
    def add_message(self, message):
        self.messages.append(message)
    
    def get_and_clear_messages(self):
        messages = self.messages
        self.messages = []
        return messages

@ray.remote
def group_processor(group_id):
    # This function process all dicom files for particular patient ID
    files = glob.glob(f'/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/*/{group_id}/*.dcm')
    
    # process all files in directory
    for f in files:
        res = process_faster(f, size = FILE_SIZE, save_folder = '', dicom_process = False)
    
    # when finished task send message to queue - "task finished all files for patient ID were processed" 
    message_actor.add_message.remote(f"p-{group_id}")
    
@ray.remote
def process_group(group_id):
    # task X on all files from group e.g. make prediction
    # this is sample so I decided to:
    # a. check number of files in directory
    # b. draw a small circle on image 
    
    files = glob.glob(f'{group_id}/*/_*.png')
    count = len(files)
    draw_circle(files)
    # send message to queue - task finished
    message_actor.add_message.remote(f"d-{group_id},{count}")
    
message_actor = MessageActor.remote()

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

LIMIT = 25 # we process only 25 patients
all_files = 0
start_time = time.time()

# As a sample process 20 clients
for g in train_group[:LIMIT]:
    group_id = g.split('/')[-2]
    # run it parallel
    group_processor.remote(group_id)

# Now we will listen all messages 
while True:
    # waiting for task finished
    # we have 2 task defined
    # task1 - processing dicom files for particular patient
    # task2 - sample function 
    
    new_messages = ray.get(message_actor.get_and_clear_messages.remote())
    
    # check messages from queue
    for message in new_messages:
        # tokenize message
        message_tokens = message.split('-')[-1].split(',')
        
        # message from task1?
        if message[0]=='p':
            print(f'Finished task1 for patient {message_tokens[0]}')
            
            # call second step of procesing - we know that all files for pateint X are processed 
            process_group.remote(message.split('-')[-1])
            LIMIT -= len(new_messages)
        
        #message from task2?
        elif message[0]=='d':
            message_tokens = message.split('-')[-1].split(',')
            all_files += int(message_tokens[-1])
            print(f"Finished task2 for patient {message_tokens[0]} number of files {message_tokens[-1]}")
    
    # we run 10 task - so we check if all are processed
    if not LIMIT:
        break
        
print(f"End of processing {all_files} files - time: {time.time()-start_time}")

ray.shutdown()

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

n = 4
out_files = glob.glob(f'./*.png')

fig, axs = plt.subplots(1, n, figsize=(25, 25))
for idx, im_file in enumerate(random.sample(out_files, n)):
    im = cv2.imread(im_file)
    axs[idx].imshow(im)
plt.show() 

#Fast dicom by Remek Kinas

@kaggleqrdl found it and discussed here: https://www.kaggle.com/competitions/rsna-breast-cancer-detection/discussion/369684#2057282

https://www.kaggle.com/code/remekkinas/fast-dicom-processing-1-6-2x-faster

In [None]:
%%capture

!pip install -U dicomsdl -q 

try:
    import pylibjpeg
except:
   !pip install /kaggle/input/rsna-2022-whl/{pylibjpeg-1.4.0-py3-none-any.whl,python_gdcm-3.0.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl}

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

import dicomsdl as dicoml
import cv2
import pydicom

from joblib import Parallel, delayed
import glob
import time
import numpy as np
import os
from matplotlib import pyplot as plt

In [None]:
#https://www.kaggle.com/code/remekkinas/fast-dicom-processing-1-6-2x-faster

image_dir_pydicom = '/kaggle/working/png_file_py/'
image_dir_dicomsdl = '/kaggle/working/png_file_dic/'

os.makedirs(image_dir_pydicom, exist_ok=True)
os.makedirs(image_dir_dicomsdl, exist_ok=True)

In [None]:
train_images = glob.glob("/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/**/*/*.dcm")
len(train_images)

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/fast-dicom-processing-1-6-2x-faster

def process(f, size=512, save_folder=None, dicom_process = True, extension="png"):
    
    patient = f.split('/')[-2]
    image_name = f.split('/')[-1][:-4]
    if dicom_process:
        dicom = pydicom.dcmread(f)
        img = dicom.pixel_array

        img = (img - img.min()) / (img.max() - img.min())

        if dicom.PhotometricInterpretation == "MONOCHROME1":  # You may want to uncomment this
            img = 1 - img
            
        image = (img * 255).astype(np.uint8)
    else:
        image = np.array(dicoml.open(f).toPilImage())

    img = cv2.resize(image, (size, size))

    file_name = f'{save_folder}' + f"{patient}_{image_name}.{extension}"

    cv2.imwrite(file_name, img)

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/fast-dicom-processing-1-6-2x-faster

start_time = time.time()
        
Parallel(n_jobs=4)(
    delayed(process)(f, size = 512, save_folder = image_dir_pydicom, dicom_process = True)
    for f in train_images[:500]
)

print(time.time() - start_time)

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/fast-dicom-processing-1-6-2x-faster

out_files = glob.glob(f'{image_dir_pydicom}*.png')
for idx, i in enumerate(out_files[:4]):
    im = cv2.imread(i)
    print(f'file {i} size in bytes: {os.path.getsize(i)}, shape: {im.shape}')
    plt.imshow(im)
    plt.show(); 

#dicomsdl - speed test


In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

start_time = time.time()
        
Parallel(n_jobs=4)(
    delayed(process)(f, size = 512, save_folder = image_dir_dicomsdl, dicom_process = False)
    for f in train_images[:500]
)

print(time.time() - start_time)

In [None]:
#Remek Kinas https://www.kaggle.com/code/remekkinas/fast-dicom-processing-1-6-2x-faster

out_files = glob.glob(f'{image_dir_dicomsdl}*.png')
for i in out_files[:4]:
    im = cv2.imread(i)
    print(f'file {i} size in bytes: {os.path.getsize(i)}, shape: {im.shape}')
    plt.imshow(im)
    plt.show() 

#We can see there is improvement in speed but further investigation in image quality is needed.

In fact, I can't see anything. I'm just copying Kinas code and hurry to change mam's dipper.

#Another DICOM processing notebook 

In [None]:
import numpy as np
import pydicom
import matplotlib.pyplot as plt
import cv2
from pydicom.pixel_data_handlers import apply_voi_lut

#David Roberts code!

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-pad-to-square

# This function is for extracting pixels from DICOM files.
#
def get_pixels(dcm_file):
    im = pydicom.dcmread(dcm_file)
    
    data = im.pixel_array
    
    data = apply_voi_lut(data, im)
    
    if im.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    else:
        data = data - np.min(data)
        
    if np.max(data) != 0:
        data = data / np.max(data)
    data=(data * 255).astype(np.uint8)
    
    return data

#Grab a random image

You'll need to install GDCM and pylibjpeg to open some of JPG compressed Abdominal images in this dataset.

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-pad-to-square

# Open an image and get pixels 
pixels = get_pixels("/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/10005/18667/100.dcm")

# Pad the original pixels to make them square
pixels_padded = pad_pixels(pixels)

I wasn't able to pad the pixel. Therefore, I'll save that above for the next time.

#Windowing

In [None]:
import numpy as np
import pydicom
import matplotlib.pyplot as plt
import cv2
from pydicom.pixel_data_handlers import apply_windowing

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing

# This function uses pydicom's apply_windowing() function to apply the default window width and level specified in the DICOM tags

def get_pixels_with_windowing(dcm_file):
    im = pydicom.dcmread(dcm_file)
    
    data = im.pixel_array
    
    # This line is the only difference in the two functions
    data = apply_windowing(data, im)
    
    if im.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    else:
        data = data - np.min(data)
        
    if np.max(data) != 0:
        data = data / np.max(data)
    data=(data * 255).astype(np.uint8)

    return data

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing

# Open an image and get the pixels twice .. once without windowing and once with it.
#
file = "/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/10005/18667/100.dcm"
pixels = get_pixels(file)
pixels_with_windowing = get_pixels_with_windowing(file)

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing

# Plot the images
fig, axes = plt.subplots(nrows=1, ncols=2,sharex=False, sharey=True, figsize=(14, 10))
ax = axes.ravel()
ax[0].set_title(f'Standard normalization')
ax[0].imshow(pixels, cmap='gray');
ax[1].set_title(f'With windowing')
ax[1].imshow(pixels_with_windowing, cmap='gray');

#Notice the image on the right looks less "washed out" and the contrast between soft and dense tissue is greater

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing

# Open another image and take a look
#
file = "/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images/10132/4816/174.dcm"
pixels = get_pixels(file)
pixels_with_windowing = get_pixels_with_windowing(file)

In [None]:
#David Roberts https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing

# Plot the images
fig, axes = plt.subplots(nrows=1, ncols=2,sharex=False, sharey=True, figsize=(14, 10))
ax = axes.ravel()
ax[0].set_title(f'Standard normalization')
ax[0].imshow(pixels, cmap='gray');
ax[1].set_title(f'With windowing')
ax[1].imshow(pixels_with_windowing, cmap='gray');

#Windowing conclusion

Applying windowing to DICOM images provides much better contrast and width of images.

This technique should be applied to JPG/PNG exports.

If the pydicom function apply_voi_lut() is used, it will also apply the default WW/WL values if a LUT does not exist .. which they routinely do not exist in mammography.

Applying windowing allows for greater range when manually adjusting brightness/contrast later.

https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing

#Acknowledgements:

Remek Kinas https://www.kaggle.com/code/remekkinas/ray-parallel-processing-dicom-files-and/notebook

David Roberts https://www.kaggle.com/code/davidbroberts/mammography-apply-windowing