In [1]:
import numpy as np
import pandas as pd
import pydicom
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image
import seaborn as sns
import os
from tqdm import tqdm
from keras.layers import Dense, GlobalAveragePooling2D, Conv2D, BatchNormalization, MaxPooling2D, Input
from keras.models import Model

Using TensorFlow backend.


In [2]:
dataset = pd.read_pickle('./stage_1_train_labels_ext')
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28989 entries, 0 to 28988
Data columns (total 9 columns):
patientId    28989 non-null object
x            8964 non-null float64
y            8964 non-null float64
width        8964 non-null float64
height       8964 non-null float64
Target       28989 non-null int64
age          28989 non-null int64
vpe          28989 non-null object
sex          28989 non-null object
dtypes: float64(4), int64(2), object(3)
memory usage: 2.2+ MB


In [3]:
xrays = []
compression = (128, 128)
path = './stage_1_train_images/'
for patientId in tqdm(dataset.patientId):
    pds = pydicom.read_file(path + patientId + '.dcm')
    img = Image.fromarray(pds.pixel_array)
    img = img.resize(compression, Image.ANTIALIAS)
    xray = np.array(img)
    xray = xray/255
    xrays.append(xray)
print('Loaded ', len(xrays), ' images')

100%|████████████████████████████████████████████████████████████████████████████| 28989/28989 [10:31<00:00, 45.93it/s]


Loaded  28989  images


In [19]:
xrays = np.array(xrays)
xrays = xrays.reshape((-1, compression[0], compression[1], 1))
xrays.shape

(28989, 128, 128, 1)

In [41]:
inputs = Input(shape=(compression[0], compression[1], 1))

cnn = Conv2D(64, (4, 4), activation='relu')(inputs)
cnn = MaxPooling2D((3, 3))(cnn)
cnn = BatchNormalization()(cnn)

cnn = GlobalAveragePooling2D()(cnn)

dense = Dense(1, activation='sigmoid')(cnn)

model = Model(inputs=inputs, outputs=dense)

model.compile(loss='binary_crossentropy', metrics = ['accuracy'], optimizer='adam')

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_16 (InputLayer)        (None, 128, 128, 1)       0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 125, 125, 64)      1088      
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 41, 41, 64)        0         
_________________________________________________________________
batch_normalization_9 (Batch (None, 41, 41, 64)        256       
_________________________________________________________________
global_average_pooling2d_8 ( (None, 64)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 65        
Total params: 1,409
Trainable params: 1,281
Non-trainable params: 128
_________________________________________________________________


In [None]:
training_history = model.fit(xrays, dataset.Target, epochs=1, batch_size=16)

Epoch 1/1
