#1. 데이터 불러오기

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

##1.1 Train/Val/Test

In [None]:
from keras.preprocessing import image

In [None]:
img_path = '/content/drive/MyDrive/aibootcamp/section4/chest_xray/train/NORMAL/NORMAL2-IM-1442-0001.jpeg'

In [None]:
img = image.load_img(img_path,target_size=(64,64))
print(img)
img_arr = image.img_to_array(img)
print(np.max(img_arr))
print(img_arr.shape)

In [None]:
img2 = image.load_img(img_path,target_size=(128,128))
print(img2)
img_arr2 = image.img_to_array(img2)
print(np.max(img_arr2))
print(img_arr2.shape)

In [None]:
# img size 정해주기
img_width = 64
img_height = 64

# train data augmentation
## val은 train data의 20%
## 보통 폐 사진은 horizontal flip 하지 않으므로 쓰지 않음.
## shear_range 사람마다 방향을 틀 수 있으니 조정
## zoom_range 체구가 다르니 상대적 크기 조정을 통한 학습
# 전처리 1: rescale을 통한 정규화 (0,255 > 0,1)
# 전처리 2: ImageDataGenerator을 통한 데이터 증강
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   validation_split =0.2)
test_datagen = ImageDataGenerator(rescale = 1./255)

# train set 만들기
# 전처리 3: rgb X. grayscale O
train_path = '/content/drive/MyDrive/aibootcamp/section4/chest_xray/train'
training_set = train_datagen.flow_from_directory(train_path,
                                                 target_size = (img_width, img_height),
                                                 batch_size = 32,
                                                 class_mode = 'binary',                                
                                                 color_mode = 'grayscale',                                                 
                                                 subset = 'training')
# val set 만들기
validation_set = train_datagen.flow_from_directory(train_path,
                                                 target_size = (img_width, img_height),
                                                 batch_size = 32,
                                                 class_mode = 'binary',
                                                 color_mode = 'grayscale',
                                                 subset= 'validation')
# test set 만들기
test_path = '/content/drive/MyDrive/aibootcamp/section4/chest_xray/test'
test_set = test_datagen.flow_from_directory(test_path,
                                            target_size = (img_width, img_height),
                                            batch_size = 32,
                                            color_mode = 'grayscale',
                                            class_mode = 'binary')

In [None]:
x = img_arr.reshape((1,) + img_arr.shape)

In [None]:
idx = 0
fig = plt.figure(figsize=(10, 10))
axs = []
for batch in train_datagen.flow(x , batch_size=1): # 여기서 batch는 x가 됨
    axs.append(fig.add_subplot(5, 4, idx+1))
    axs[idx].imshow(image.array_to_img(batch[0]))
    idx += 1
    if idx%20 == 0:
        break
fig.tight_layout()
plt.show()

In [None]:
num2 = test_set.classes
uni2,count2 = np.unique(num2,return_counts=True)
print(count2)

In [None]:
num = training_set.classes
uni, count = np.unique(num,return_counts=True)
print(uni)
print(count)

In [None]:
# 전처리 4: 데이터 불균형 해소
normal_num = count[0]
pneumonia_num = count[1]
total = normal_num + pneumonia_num
w0 = round((1/normal_num)*(total)/2.0,2)
w1 = round((1/pneumonia_num)*(total)/2.0,2)
class_weight = {0: w0, 1:w1}
print('Normal weight: ',w0)
print('Pneumonia weight: ',w1)

In [None]:
# 데이터 시각화
train_ds = tf.keras.preprocessing.image_dataset_from_directory(train_path,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
# Import Val Data
val_ds = tf.keras.preprocessing.image_dataset_from_directory(train_path,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)


class_names = train_ds.class_names
print(class_names)

import matplotlib.pyplot as plt
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(25):
    ax = plt.subplot(5, 5, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

#2. CNN Model

##2.1 모델 구축

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense

In [None]:
model1 = Sequential()

# Step 1 - Convolution
model1.add(Conv2D(32, (3, 3), input_shape = (img_width, img_height, 1), activation = 'relu'))

# Step 2 - Pooling
model1.add(MaxPooling2D(pool_size = (2, 2)))

# Adding a second convolutional layer
model1.add(Conv2D(32, (3, 3), activation = 'relu'))
model1.add(MaxPooling2D(pool_size = (2, 2)))

# Step 3 - Flattening
model1.add(Flatten())

# Step 4 - Full connection
model1.add(Dense(units = 128, activation = 'relu'))
model1.add(Dense(units = 1, activation = 'sigmoid'))

##2.2 모델 컴파일

In [None]:
METRICS = ['accuracy', tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall')]

In [None]:
# Compiling the CNN
model1.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = METRICS)

##2.3 Early Stopping 

In [None]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor='val_acc',mode='max',verbose=1,patience=5)

##2.4 모델 학습

In [None]:
history = model1.fit_generator(training_set,
                         steps_per_epoch = training_set.samples//batch_size,
                         epochs = 20, callbacks=[es],
                         validation_data = validation_set,
                         validation_steps = validation_set.samples//batch_size,
                         class_weight=class_weight)

##2.5 하이퍼 파라미터 튜닝 과정 이미지

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(20, 3))
ax = ax.ravel()

for i, met in enumerate(['recall', 'accuracy', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

##2.6 모델 평가

In [None]:
model1.evaluate(test_set,verbose=2)

In [None]:
model2.save('pneu_model2.h5')

In [None]:
classifier.save('pneu_model1.h5')