# 객담도말 결핵진단 딥러닝 모델 - VGG-like convnet

VGG-like convnet 기반의 객담도말 결핵진단 딥러닝 모델 소스코드입니다. 5 k-fold로 cross validation 기법이 적용되었습니다.

In [None]:
#improt 

import os

from __future__ import print_function
import numpy as np

np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils

from sklearn.cross_validation import KFold

In [None]:
# confiugration

MODEL_NAME = 'tb_vgglike'

BATCH_SIZE = 128 # 한 epoch에서 실행시키는 단위(배치)크기
NUM_CLASSES = 2 # 클래스 수
NUM_EPOCHS = 12 # epoch 수
NUM_CV_K_FOLD = 5 # cross validation k-fold number

IMG_CHANNELS = 1
IMG_ROWS = 64
IMG_COLS = 64

TRAIN_DATA_COUNT = 447648
TEST_DATA_COUNT = 15873

TRAIN_IMAGE_FILE_PATH = './dataset/train_image_64x64_gray_447648.bin'
TRAIN_LABEL_FILE_PATH = './dataset/train_label_64x64_gray_447648.bin'
TEST_IMAGE_FILE_PATH = './dataset/test_image_64x64_gray_15873.bin'
TEST_LABEL_FILE_PATH = './dataset/test_label_64x64_gray_15873.bin'

TEST_RESULT_FILE_PATH = './output/'+MODEL_NAME+'_train_result.txt'
MODEL_ARCHITECUTRE_FILE_PATH = './save/'+MODEL_NAME+'_model_architecture.json'
MODEL_WEIGHTS_FILE_PATH = './save/'+MODEL_NAME+'_model_weights.h5'

In [None]:
# 입력된 경로를 검사하여 폴더가 없다면 생성합니다.

def check_make_dir(path):
    
    dirname = os.path.dirname(path)
    
    if not os.path.exists(dirname):
        os.makedirs(dirname)

In [None]:
# 이미지 자료를 파일로부터 로딩합니다.

def load_image(filename, count, channel, row, col):
    print('Loading data from', filename)

    print('file size : ', os.path.getsize(filename))
    print('calc size : ', count * channel * row * col)
    
    fp = open(filename, 'rb')
    buf = fp.read(count * channel * row * col)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(count, channel, row, col)

    print('loaded shape : ', data.shape)

    data = data.astype('float32')
    data /= 255

    return data

# 라벨 자료를 파일로부터 로딩합니다.

def load_label(filename, count, classes):
    print('Loading labels from ', filename)
    
    print('file size : ', os.path.getsize(filename))
    print('calc size : ', count)
    
    fp = open(filename, 'r')
    buf = fp.read(count)
        
    data_bin = []
    for i in buf:
        data_bin.append(i)
    data = np.asarray(data_bin, dtype=np.uint8, order='C')

    print('loaded shape : ', data.shape)
    
    label_hist = np.histogram(data, bins=range(NUM_CLASSES+1))
    print(label_hist)
    
    # convert class vectors to binary class matrices
    data = np_utils.to_categorical(data, classes)

    return data

In [None]:
train_image = load_image(TRAIN_IMAGE_FILE_PATH, TRAIN_DATA_COUNT, IMG_CHANNELS, IMG_ROWS, IMG_COLS)
test_image = load_image(TEST_IMAGE_FILE_PATH, TEST_DATA_COUNT, IMG_CHANNELS, IMG_ROWS, IMG_COLS)

train_label = load_label(TRAIN_LABEL_FILE_PATH, TRAIN_DATA_COUNT, NUM_CLASSES)
test_label = load_label(TEST_LABEL_FILE_PATH, TEST_DATA_COUNT, NUM_CLASSES)

In [None]:
# 딥리닝 모델을 구축합니다.

model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(IMG_CHANNELS, IMG_ROWS, IMG_COLS)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='valid'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES))

model.add(Activation('softmax'))

# np_utils.visualize_util.plot(model, to_file='model.png')

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

kf = KFold(len(train_label), n_folds=NUM_CV_K_FOLD)

k_fold_idx = 0

for train_idxs, validation_idxs in kf:
    
    print('running %d flod...'%k_fold_idx)
    
    model.fit(train_image[train_idxs], 
              train_label[train_idxs], 
              batch_size=BATCH_SIZE, 
              nb_epoch=NUM_EPOCHS,
              verbose=1, 
              validation_data=(train_image[validation_idxs], train_label[validation_idxs]))

In [None]:
# 딥러닝 모델 테스트를 수행합니다.

score = model.evaluate(test_image, test_label, verbose=1)

print('Test score:', score[0])
print('Test accuracy:', score[1])

classes = model.predict_classes(test_image, batch_size=32)

check_make_dir(TEST_RESULT_FILE_PATH)

np.savetxt(TEST_RESULT_FILE_PATH, classes, fmt='%d')

In [None]:
# 모델 훈련 결과를 파일로 저장합니다.

print('Save model...')

check_make_dir(MODEL_WEIGHTS_FILE_PATH)
model.save_weights(MODEL_WEIGHTS_FILE_PATH, overwrite=True)
json_string = model.to_json()

check_make_dir(MODEL_ARCHITECUTRE_FILE_PATH)
fp_js = open(MODEL_ARCHITECUTRE_FILE_PATH, 'w')
fp_js.write(json_string)
fp_js.close()

print('Done.')