In [1]:
import pydicom as py
import os
import tensorflow as tf
import tensorflow_hub as hub
import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2

import eval_utils_nih as eu

In [2]:
dicom_dir = '../input/nih-test-dicom'

dicom_file_paths = glob.glob('{}/*.dcm'.format(dicom_dir))

In [3]:
from keras.models import model_from_json

model_dir = '../input/nih-model-training'

model_json_file = open(os.path.join(model_dir, 'model-1.json'), 'r')  
model_weights_file = os.path.join(model_dir, 'model-1.h5')

model_json = model_json_file.read()
model_json_file.close()

loaded_model = model_from_json(model_json)
loaded_model.load_weights(model_weights_file)

loaded_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

loaded_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Model)                (None, 7, 7, 512)         20024384  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dropout (Dropout)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 2156)              54091884  
_________________________________________________________________
dropout_1 (Dropout)          (None, 2156)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1280)              2760960   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1280)              0

In [4]:
key_fields = [
    'Modality', 'PatientAge', 'PatientID', 
    'PatientPosition', 'PatientSex', 
    'PixelData', 'StudyDescription', 'BodyPartExamined',
]

classes = ['NORMAL', 'PNEUMONIA']

ERROR_OUTPUT = (None, None)

In [5]:
def check_dicom(filepath):
    
    splitted = filepath.split('/')
    
    filename = splitted[len(splitted)-1]
    
    print('Load file {} ...'.format(filename))
    
    try:
        
        dcm_data = py.dcmread(filepath)
    
    except:
        
        print('File {} is not a valid DICOM file'.format(filename))
        
        return ERROR_OUTPUT
    
    else:
        
        dcm_keys = dcm_data.dir()

        contains_all_fields =  all(item in dcm_keys for item in key_fields)

        if not contains_all_fields:

            print('File {} does not contain all required fields'.format(filename))

            return ERROR_OUTPUT

        info_dict = {field: dcm_data[field].value for field in key_fields}

        if info_dict['Modality'] != 'DX':

            print('The modality of File {} is not Digital Radiography(DX) - {}'.format(filename, info_dict['Modality']))

            return ERROR_OUTPUT

        if info_dict['PatientPosition'] != 'AP' and info_dict['PatientPosition'] != 'PA':

            print('The view position of File {} is not valid - {}'.format(filename, info_dict['PatientPosition']))
            
            return ERROR_OUTPUT

        if info_dict['BodyPartExamined'] != 'CHEST':

            print('The body part examined of File {} is not CHEST - {}'.format(filename, info_dict['BodyPartExamined']))

            return ERROR_OUTPUT
        
        study_description = info_dict['StudyDescription']

        return study_description, dcm_data.pixel_array

In [6]:
def preprocess_image(np_img, img_size):
    
    np_img = cv2.cvtColor(np_img, cv2.COLOR_BGR2RGB)
    
    np_img = np_img / 255 
    
    np_img = np_img - 0.98 # intensity threshold to try to separate the background of the image
    
    image_tensor = tf.image.convert_image_dtype(np_img, dtype=tf.float32)
    
    np_image = tf.image.resize(image_tensor, size=img_size).numpy()
    
    image = tf.expand_dims(np_image, 0)
    
    return image

In [7]:
def predict_image(model, image, thresh):
    
    pred = model.predict(image)

    return classes[int(pred > thresh)]

In [8]:
THRESH = 0.5
IMG_SIZE = [224, 224]

for filepath in dicom_file_paths:
    
    study_description, np_img = check_dicom(filepath)
    
    if np_img is None:
        continue
        
    processed_image_tensor = preprocess_image(np_img, IMG_SIZE)
    
    prediction = predict_image(loaded_model, processed_image_tensor, THRESH)
    
    print('Study Description: {}, Prediction: {}'.format(study_description, prediction))

Load file test5.dcm ...
The modality of File test5.dcm is not Digital Radiography(DX) - CT
Load file test4.dcm ...
The body part examined of File test4.dcm is not CHEST - RIBCAGE
Load file test1.dcm ...
Study Description: No Finding, Prediction: NORMAL
Load file test2.dcm ...
Study Description: Cardiomegaly, Prediction: NORMAL
Load file test6.dcm ...
The view position of File test6.dcm is not valid - XX
Load file test3.dcm ...
Study Description: Effusion, Prediction: PNEUMONIA
