# 객담도말 결핵진단 딥러닝 모델

CNN 기반의 객담도말 결핵진단 딥러닝 모델 소스코드입니다.

In [1]:
import os

from __future__ import print_function
import numpy as np

np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils

Using Theano backend.


딥러닝 모델관련 환경설정입니다.

In [2]:
BATCH_SIZE = 128 # 한 epoch에서 실행시키는 단위(배치)크기
NUM_CLASSES = 2 # 클래스 수
NUM_EPOCHS = 1 # epoch 수
NUM_FILTERS = 32 # convolution 필터 수
NUM_POOL = 2 # max plling을 위한 pooling 영역 크기
NUM_CONV = 3 # convolution 커널 크기

데이터셋 관련 환경설정입니다.

In [8]:
IMG_CHANNELS = 1
IMG_ROWS = 64
IMG_COLS = 64

TRAIN_DATA_COUNT = 447648

train_img_filename = './datasets/train_image_64x64_gray_447648.bin'
train_label_filename = './datasets/train_label_64x64_gray_447648.bin'

TEST_DATA_COUNT = 15873

test_img_filename = './datasets/test_image_64x64_gray_15873.bin'
test_label_filename = './datasets/test_label_64x64_gray_15873.bin'

VALIDATION_DATA_COUNT = int(TRAIN_DATA_COUNT * 1.0/4.0)

In [9]:
MODEL_SAVE_FILE_PATH = './seq_model_cnn.h5'
PREDICT_FILE_PATH = './predict.txt'

img 자료 로딩 함수 입니다.

In [10]:
def load_img(filename, count, channel, row, col):
    print('Loading data from', filename)

    print('file size : ', os.path.getsize(filename))
    print('calc size : ', count * channel * row * col)
    
    fp = open(filename, 'rb')
    buf = fp.read(count * channel * row * col)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(count, channel, row, col)

    print('loaded shape : ', data.shape)

    data = data.astype('float32')
    data /= 255

    return data

label 자료 로딩함수입니다.

In [11]:
def load_label(filename, count, classes):
    print('Loading labels from ', filename)
    
    print('file size : ', os.path.getsize(filename))
    print('calc size : ', count)
    
    fp = open(filename, 'r')
    buf = fp.read(count)
        
    data_bin = []
    for i in buf:
        data_bin.append(i)
    data = np.asarray(data_bin, dtype=np.uint8, order='C')

    print('loaded shape : ', data.shape)
    
    label_hist = np.histogram(data, bins=range(NUM_CLASSES+1))
    print(label_hist)
    
    # convert class vectors to binary class matrices
    data = np_utils.to_categorical(data, classes)

    return data

In [12]:
# the data, shuffled and split between train and test sets
train_img = load_img(train_img_filename, TRAIN_DATA_COUNT, IMG_CHANNELS, IMG_ROWS, IMG_COLS)
test_img = load_img(test_img_filename, TEST_DATA_COUNT, IMG_CHANNELS, IMG_ROWS, IMG_COLS)
#validation_img = load_img(validation_img_filename, VALIDATION_DATA_COUNT, IMG_CHANNELS, IMG_ROWS, IMG_COLS)

train_label = load_label(train_label_filename, TRAIN_DATA_COUNT, NUM_CLASSES)
test_label = load_label(test_label_filename, TEST_DATA_COUNT, NUM_CLASSES)
#validation_label = load_label(validation_label_filename, VALIDATION_DATA_COUNT, NUM_CLASSES)

Loading data from ./datasets/train_image_64x64_gray_447648.bin
file size :  1833566208
calc size :  1833566208
loaded shape :  (447648, 1, 64, 64)
Loading data from ./datasets/test_image_64x64_gray_15873.bin
file size :  65015808
calc size :  65015808
loaded shape :  (15873, 1, 64, 64)
Loading labels from  ./datasets/train_label_64x64_gray_447648.bin
file size :  447648
calc size :  447648
loaded shape :  (447648,)
(array([354528,  93120]), array([0, 1, 2]))
Loading labels from  ./datasets/test_label_64x64_gray_15873.bin
file size :  15873
calc size :  15873
loaded shape :  (15873,)
(array([15520,   353]), array([0, 1, 2]))


훈련셋의 일부로부터 검증셋을 생성합니다.

In [14]:
validation_img = train_img[:VALIDATION_DATA_COUNT, ...]
validation_label = train_label[:VALIDATION_DATA_COUNT, ...]

train_img = train_img[VALIDATION_DATA_COUNT:, ...]
train_label = train_label[VALIDATION_DATA_COUNT:, ...]

print('train count : ' + str(len(train_img)))
print('validation count : ' + str(len(validation_img)))

train count : 223824
validation count : 111912


딥리닝 모델을 구축합니다.

In [15]:
model = Sequential()
 
model.add(Convolution2D(NUM_FILTERS, NUM_CONV, NUM_CONV,
                        border_mode='valid',
                        input_shape=(IMG_CHANNELS, IMG_ROWS, IMG_COLS)))
model.add(Activation('relu'))
model.add(Convolution2D(NUM_FILTERS, NUM_CONV, NUM_CONV))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(NUM_POOL, NUM_POOL)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES))
model.add(Activation('softmax'))

# np_utils.visualize_util.plot(model, to_file='model.png')

딥러닝 모델을 구축합니다.

In [17]:
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

model.fit(train_img, 
          train_label, 
          batch_size=BATCH_SIZE, 
          nb_epoch=NUM_EPOCHS,
          verbose=1, 
          validation_data=(validation_img, validation_label))

Train on 223824 samples, validate on 111912 samples
Epoch 1/1


<keras.callbacks.History at 0x126456710>

딥러닝 모델 테스트를 수행합니다.

In [11]:
score = model.evaluate(test_img, test_label, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.0604064624439
Test accuracy: 0.97668997669


In [None]:
classes = model.predict_classes(test_img, batch_size=32)
np.savetxt(PREDICT_FILE_PATH, classes, fmt='%d')



In [None]:
model.summary()
model.save_weights(MODEL_SAVE_FILE_PATH)

____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
convolution2d_1 (Convolution2D)    (None, 32, 62, 62)  320         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
activation_1 (Activation)          (None, 32, 62, 62)  0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)    (None, 32, 60, 60)  9248        activation_1[0][0]               
____________________________________________________________________________________________________
activation_2 (Activation)          (None, 32, 60, 60)  0           convolution2d_2[0][0]            
___________________________________________________________________________________________