In [None]:
try: 
    import dicomsdl
except:
    !pip install /kaggle/input/rsna-2023-abdominal-trauma/dicomsdl-0.109.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
    
import os 
import sys 
sys.path.append('/kaggle/input/rsna-2023-abdominal-trauma')
sys.path.append('/kaggle/input/rsna-2023-abdominal-trauma-weight-00')


import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
from glob import glob
import dicomsdl

import torch
import torch.nn as nn
import torch.nn.functional as F

import cv2
from timeit import default_timer as timer
    
from kaggle_helper import * 
from kaggle_metric import * 
    
print('IMPORT OK!!!!')

In [None]:
cfg = dotdict(
    max_size=256,
    slice_scan_size=(96,160,160),
    liver_scan_size=(96,256,256),
    
    device='cuda',#'cuda' #cpu
)



mode = 'local' #submit #local

if mode =='local':
    image_dir  = '/kaggle/input/rsna-2023-abdominal-trauma-detection/train_images'
    #series_df = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train_series_meta.csv')
    fold_df = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-weight-00/valid_df.fold2.csv') 
    fold_df = fold_df[:20]    
    valid_id  = list(zip(fold_df.patient_id, fold_df.series_id))
         
    train_df = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/train.csv') 
    valid_df = train_df[train_df.patient_id.isin(fold_df.patient_id)].reset_index(drop=True)

    
if mode =='submit':
    image_dir = '/kaggle/input/rsna-2023-abdominal-trauma-detection/test_images'
    series_df = pd.read_csv('/kaggle/input/rsna-2023-abdominal-trauma-detection/test_series_meta.csv')
    valid_df  = pd.read_csv( '/kaggle/input/rsna-2023-abdominal-trauma-detection/sample_submission.csv') 
    valid_id  = list(zip(series_df.patient_id, series_df.series_id))
 
    

patient_id = valid_df.patient_id.unique()
    
print('len(patient_id)',len(patient_id))   
print('patient_id',patient_id[:10])   
print('')
 
print('len(valid_id)',len(valid_id))   
print('valid_id',valid_id[:10]) 
print('')

#---


print('MODE SETTING OK!!!!')

In [None]:
#model

from slice_model import Net as SliceNet
from liver_model import Net as LiverNet

slice_checkpoint=\
    '/kaggle/input/rsna-2023-abdominal-trauma-weight-00/00000448.pth'
liver_checkpoint=\
    '/kaggle/input/rsna-2023-abdominal-trauma-weight-00/00004313.pth'

#---
slice_net = SliceNet(cfg=cfg)
print(slice_net.load_state_dict(
    torch.load(slice_checkpoint, map_location=lambda storage, loc: storage)['state_dict'],
    strict=False
)) # True
slice_net = slice_net.eval()

#---

liver_net = LiverNet(cfg=cfg)
print(liver_net.load_state_dict(
    torch.load(liver_checkpoint, map_location=lambda storage, loc: storage)['state_dict'],
    strict=False
)) # True
liver_net = liver_net.eval()

print('MODEL OK!!!!')

In [None]:
#dataset

def do_pad_to_square(image):
    l, h, w = image.shape
    if w > h:
        pad = w - h
        pad0 = pad // 2
        pad1 = pad - pad0
        image = F.pad(image, [0, 0, pad0, pad1], mode='constant', value=0)
    if w < h:
        pad = h - w
        pad0 = pad // 2
        pad1 = pad - pad0
        image = F.pad(image, [pad0, pad1, 0, 0], mode='constant', value=0)
    return image

def do_scale_to_size(image, spacing, max_size):
    dz, dy, dx = spacing
    l, s, s = image.shape # scale to max size
    if max_size != s:
        scale = max_size / s

        l = int(dz / dy * l * 0.5)  # we use sapcing dz,dy,dx = 2,1,1
        l = int(scale * l)
        h = int(scale * s)
        w = int(scale * s)

        image = F.interpolate(
            image.unsqueeze(0).unsqueeze(0),
            size=(l, h, w),
            mode='trilinear',
            align_corners=False,
        ).squeeze(0).squeeze(0)

    return image


def dicomsdl_to_numpy_image(ds, index=0):
    info = ds.getPixelDataInfo()
    if info['SamplesPerPixel'] != 1:
        raise RuntimeError('SamplesPerPixel != 1')  # number of separate planes in this image
    shape = [info['Rows'], info['Cols']]
    dtype = info['dtype']
    outarr = np.empty(shape, dtype=dtype)
    ds.copyFrameData(index, outarr)
    return outarr

def load_dicomsdl_dir(dcm_dir, slice_range=None):
    dcm_file = sorted(glob(f'{dcm_dir}/*.dcm'), key=lambda x: int(x.split('/')[-1].split('.')[0]))
     
    #fake some slice so that it won't cause error ....
    if len(dcm_file)==1:
        dcm = dicomsdl.open(dcm_file[0])
        pixel_array = dicomsdl_to_numpy_image(dcm) 
        pixel_array = pixel_array.astype(np.float32)
        image = np.stack([pixel_array]*16)
        dz,dy,dx = 1,1,1
        return image, (dz,dy,dx)
    
    
    #------------------------------------
    if slice_range is None: 
        slice_min = int(dcm_file[0].split('/')[-1].split('.')[0])
        slice_max = int(dcm_file[-1].split('/')[-1].split('.')[0])+1
        slice_range=(slice_min, slice_max)

    slice_min, slice_max = slice_range
    sz0, szN = None, None

    image = []
    for s in range(slice_min, slice_max):
        f = f'{dcm_dir}/{s}.dcm'

        #dcm = pydicom.read_file(f)
        #m = dcm.pixel_array
        #m = standardize_pixel_array(dcm)

        dcm = dicomsdl.open(f)
        pixel_array = dicomsdl_to_numpy_image(dcm)
        if dcm.PixelRepresentation == 1:
            bit_shift = dcm.BitsAllocated - dcm.BitsStored
            dtype = pixel_array.dtype
            pixel_array = (pixel_array << bit_shift).astype(dtype) >> bit_shift

        #processing
        pixel_array = pixel_array.astype(np.float32)
        pixel_array = dcm.RescaleSlope * pixel_array + dcm.RescaleIntercept
        xmin = dcm.WindowCenter-0.5-(dcm.WindowWidth-1)* 0.5
        xmax = dcm.WindowCenter-0.5+(dcm.WindowWidth-1)* 0.5
        norm = np.empty_like(pixel_array, dtype=np.uint8)
        dicomsdl.util.convert_to_uint8(pixel_array, norm, xmin, xmax)

        if dcm.PhotometricInterpretation == 'MONOCHROME1':
            norm = 255 - norm
        image.append(norm)

    if 1: #check inversion
        dcm0 = dicomsdl.open(f'{dcm_dir}/{slice_min}.dcm')
        dcmN = dicomsdl.open(f'{dcm_dir}/{slice_max-1}.dcm')
        sx0, sy0, sz0 = dcm0.ImagePositionPatient
        sxN, syN, szN = dcmN.ImagePositionPatient
        if szN > sz0:
            image=image[::-1]

        dx, dy = dcm0.PixelSpacing
        dz = np.abs((szN - sz0) / (slice_max - slice_min-1))

    image = np.stack(image)
    return image, (dz,dy,dx)



def pre_process_slice_predictor(image):
    l,s,s = image.shape
    L,S,S = cfg.slice_scan_size

    l1 = int(S / s * l)
    image = F.interpolate(
            image.unsqueeze(0).unsqueeze(0),
            size=[l1,S,S],
            mode='trilinear'
        ).squeeze(0).squeeze(0)

    # pad or crop to max length L
    if L > l1:
        image = F.pad(image, [0, 0, 0, 0, 0, L - l1], mode='constant', value=0)
    if L < l1:
        image = image[:L]
    return image

def post_process_slice_predictor(x, image, slice_prob):
    l,s,s = x.shape
    L,S,S = image.shape

    l1 = int(S / s * l)
    p = F.interpolate(
            slice_prob.unsqueeze(0),
            size=[l1],
            mode='linear'
        ).squeeze(0).squeeze(0)

    # unpad or uncrop to max length L
    if L > l1:
        p = F.pad(p, [0, L - l1], mode='constant', value=0)
    return p

#---

def pre_process_liver_predictor(image, slice_predict):
    z = torch.where(slice_predict > 0)[0]
    if len(z)==0:
        z0, z1 = 0, 96 
    else: 
        z0, z1 = z.min().item(), z.max().item()

    sub_image = image[z0:z1]

    L,S,S = cfg.liver_scan_size
    zz=0
    sub_image = F.interpolate(
        sub_image.unsqueeze(0).unsqueeze(0),
        size=[L,S,S],
        mode='trilinear'
    ).squeeze(0).squeeze(0)
    return sub_image

print('DATASET SETTING OK!!!!')

In [None]:
#### START HERE !!!! ##################
dummy={
    'liver_healthy' : 1,
    'liver_low'     : 0,
    'liver_high'    : 0,
    'spleen_healthy': 1,
    'spleen_low'    : 0,
    'spleen_high'   : 0,
    'kidney_healthy': 1,
    'kidney_low'    : 0,
    'kidney_high'   : 0, 
    'bowel_healthy'        : 1,
    'bowel_injury'         : 0,
    'extravasation_healthy': 1,
    'extravasation_injury' : 0,
}

def np_min_max_norm(x):
    return (x-x.min())/(x.max()-x.min()+0.001)

def norm_to_one(x):
    s = sum(x)
    x=[xx/s for xx in x]
    return x


if cfg.device=='cuda':
    slice_net = slice_net.cuda()
    liver_net = liver_net.cuda()

if 1:
    submit_df_data = []
    start_timer = timer()
    for t,(patient_id, series_id) in enumerate(valid_id):
        # https://www.kaggle.com/competitions/rsna-2023-abdominal-trauma-detection/discussion/435815
        # Single corrupt image in the hidden test set
        # test_images/3124/5842/514.dcm

        if series_id in [5842]:
            submit_df_data.append({
                'patient_id': patient_id,
                'series_id' : series_id, 
            } | dummy)
            continue
            
        #---------------------------------------------------------------------------   
        print('\r', f'{t}/{len(valid_id)} : {patient_id},{series_id}', time_to_str(timer() - start_timer, 'min'), end='', flush=True)
        try: 
            dcm_dir = f'{image_dir}/{patient_id}/{series_id}'
            image, (dz, dy, dx) = load_dicomsdl_dir(dcm_dir, slice_range=None) #byte
            image = torch.from_numpy(image).float()
            image = do_pad_to_square(image)
            image = do_scale_to_size(image, (dz, dy, dx), max_size=cfg.max_size) #torch.Size([193, 256, 256])

            if cfg.device=='cuda':
                image = image.cuda()
            #print('image.shape', image.shape)

            with torch.cuda.amp.autocast(enabled=True):
                with torch.no_grad():
                    #stage.1 : predict slice
                    x = pre_process_slice_predictor(image)
                    slice_prob = slice_net.infer(x.unsqueeze(0))
                    slice_prob = post_process_slice_predictor(x, image, slice_prob)
                    slice_predict = (slice_prob>0.5).byte()


                    #stage.2 : predict for liver, spleen, kidney
                    x = pre_process_liver_predictor(image, slice_predict)
                    liver_prob, spleen_prob, kidney_prob = liver_net.infer(x.unsqueeze(0))
                    liver_prob, spleen_prob, kidney_prob = \
                        liver_prob [0].data.cpu().numpy().tolist(), \
                        spleen_prob[0].data.cpu().numpy().tolist(), \
                        kidney_prob[0].data.cpu().numpy().tolist()


                    #stage.3 : predict for bowel, active_extravasation
                    #
                    # we train different model because the ground truth are given differently,
                    # one at series(volume) level, another at instance(image) level
                    # further, active_extravasation uses multiple series
                    #
                    # this is not shown here, so we just put in default values ....

                    bowel_prob=norm_to_one([0.979663, 0.020337*6])
                    extravasation_prob=norm_to_one([0.936447, 0.063553*6])

            #=============================================
            submit_df_data.append({
                'patient_id': patient_id,
                'series_id' : series_id,

                'liver_healthy' : liver_prob[0],
                'liver_low'     : liver_prob[1],
                'liver_high'    : liver_prob[2],
                'spleen_healthy': spleen_prob[0],
                'spleen_low'    : spleen_prob[1],
                'spleen_high'   : spleen_prob[2],
                'kidney_healthy': kidney_prob[0],
                'kidney_low'    : kidney_prob[1],
                'kidney_high'   : kidney_prob[2],

                'bowel_healthy'        : bowel_prob[0],
                'bowel_injury'         : bowel_prob[1],
                'extravasation_healthy': extravasation_prob[0],
                'extravasation_injury' : extravasation_prob[1],

                #'any_injury' : 0, # auto generated

            })
        except:
            submit_df_data.append({
                'patient_id': patient_id,
                'series_id' : series_id, 
            } | dummy)
            
        if t<3: #debug and show results
            L, S, S = image.shape

            v = image.float().data.cpu().numpy() / 255
            v0_mean = np.clip(v.mean(0), 0, 1)
            v1_mean = np.clip(v.mean(1), 0, 1)
            v2_mean = np.clip(v.mean(2), 0, 1)
            v0_mean = np_min_max_norm(v0_mean)
            v1_mean = np_min_max_norm(v1_mean)
            v2_mean = np_min_max_norm(v2_mean)

            overlay1 = cv2.cvtColor(np.hstack([v0_mean, np.zeros((S, S), dtype=np.float32)]), cv2.COLOR_GRAY2RGB)
            overlay2 = cv2.cvtColor(np.hstack([v1_mean, v2_mean]), cv2.COLOR_GRAY2RGB)
            overlay1 = (overlay1 * 255).astype(np.uint8)
            overlay2 = (overlay2 * 255).astype(np.uint8)

            #---
            p = slice_predict.data.cpu().numpy()
            z = np.where(p>0)[0]
            if len(z)==0:
                z0, z1 = 0,1
            else:
                z0, z1 = z.min(), z.max()

            cv2.line(overlay2, (0, z0  ), (S*2, z0  ), (0, 255, 0), 1)
            cv2.line(overlay2, (0, z1-1), (S*2, z1-1), (0, 255, 0), 1)
            #---
            overlay = np.vstack([overlay1,overlay2])
            cv2.line(overlay, (0, S-1), (S*2, S-1), (255, 255, 255), 1)
            cv2.line(overlay, (S-1, 0), (S-1, S + L), (255, 255, 255), 1)
            
            print('')
            print('liver_prob: ', liver_prob)
            print('spleen_prob:', spleen_prob)
            print('kidney_prob:', kidney_prob)
            print('image.shape',  image.shape)
            print('')
            #image_show_norm('overlay', overlay, resize=1)
            #cv2.waitKey(1)
            plt.imshow(overlay)
            plt.show()
            
    #-------------------------------------------------------------------------------------------     
    print('')
    submit_col=[
        #'patient_id',
        'bowel_healthy','bowel_injury','extravasation_healthy','extravasation_injury','kidney_healthy','kidney_low','kidney_high','liver_healthy','liver_low','liver_high','spleen_healthy','spleen_low','spleen_high'
    ]
    submit_df = pd.DataFrame(submit_df_data)
    #submit_df.to_csv('submit_df.ungroup.csv',index=False)

    gb = submit_df.groupby('patient_id').mean()
    gb = gb.drop('series_id', axis=1)
    
    submit_df = gb
    #submit_df = submit_df.set_index('patient_id') 
    submit_df = submit_df[submit_col]
    submit_df = submit_df.loc[valid_df.patient_id]
    submit_df = submit_df.reset_index(drop=False)
    submit_df.to_csv('submission.csv',index=False)

    print('submit_df')
    print('\t', submit_df.shape)
    print('\t', submit_df.patient_id.values.tolist()[:5])
    print('')
    print('valid_df')
    print('\t', valid_df.shape)
    print('\t', valid_df.patient_id.values.tolist()[:5])
    print('')
    assert(all(valid_df.patient_id==submit_df.patient_id))
    #seems that kaggler evaluation server did not check the order of patient_id
    
    for i in range(3):
        print(submit_df.iloc[i])
        print('-----')
        
print('SUBMISSION OK !!!!')

In [None]:
if mode=='local':
    #submit_df = pd.read_csv('/kaggle/working/submission.csv')
    submit_df = pd.read_csv('submission.csv')
    truth_df = valid_df 
    truth_df = add_weight_to_truth_df(truth_df) 
    lb_score = do_lb_score(truth_df, submit_df, row_id_column_name='patient_id')
    print('lb_score', lb_score)
    
'''

920/921 : 9813,24149  1 hr 19 minn
submit_df
	 (613, 14)
	 [10026, 10065, 10228, 10557, 10683]

valid_df
	 (613, 15)
	 [10026, 10065, 10228, 10557, 10683]


lb_score 0.44936877206751086
'''