## 1. 일반적인 Tensorflow.keras CNN 모델링
- ``cifar10`` 데이터로 실습 테스트


In [1]:
import numpy as np
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

def load_datasets():
    (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
    return train_images, train_labels, test_images, test_labels
    
def scaled_pixels(images, labels, scaling=False):
    if scaling:
        images = np.array(images/255.0, dtype=np.float32)
    else:
        images = np.array(images, dtype=np.float32)
    labels = np.array(labels, dtype=np.float32)
    return images, labels

def transform_ohe(labels):
    ohe_labels = to_categorical(labels)
    return ohe_labels

def train_valid_split(train_images, train_ohe_labels):
    tr_images, val_images, tr_ohe_labels, val_ohe_labels = train_test_split(train_images, train_ohe_labels,
                                                                           test_size=0.15, random_state=42)
    return tr_images, val_images, tr_ohe_labels, val_ohe_labels

def preprocess_data():
    train_images, train_labels, test_images, test_labels = load_datasets()
    train_images, train_labels = scaled_pixels(train_images, train_labels, scaling=False)
    test_images, test_labels = scaled_pixels(test_images, test_labels, scaling=False)
    train_ohe_labels = transform_ohe(train_labels)
    test_ohe_labels = transform_ohe(test_labels)
    tr_images, val_images, tr_ohe_labels, val_ohe_labels = train_valid_split(train_images, train_ohe_labels)
    print('Train:', tr_images.shape, tr_ohe_labels.shape)
    print('Valid:', val_images.shape, val_ohe_labels.shape)
    print('Test:', test_images.shape, test_ohe_labels.shape)
    return tr_images, val_images, tr_ohe_labels, val_ohe_labels, test_images, test_ohe_labels

tr_images, val_images, tr_ohe_labels, val_ohe_labels, test_images, test_ohe_labels = preprocess_data()

2021-10-12 03:48:54.406165: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Train: (42500, 32, 32, 3) (42500, 10)
Valid: (7500, 32, 32, 3) (7500, 10)
Test: (10000, 32, 32, 3) (10000, 10)


In [2]:
# 내가 직접 CNN 모델 만들어 모델링
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

def create_model(input_size, verbose=False):
    input_tensor = Input(shape=(input_size, input_size, 3))
    x = Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')(input_tensor)
    x = Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')(x)
    
    x = Conv2D(filters=64, kernel_size=3, padding='same', activation='relu')(x)
    x = Conv2D(filters=64, kernel_size=3, padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=2)(x)
    
    x = Conv2D(filters=128, kernel_size=3, padding='valid', activation='relu')(x)
    x = Conv2D(filters=128, kernel_size=3, padding='same', activation='relu')(x)
    x = MaxPooling2D(pool_size=2)(x)
    # Classfier Layer
    x = Flatten()(x)
    x = Dropout(rate=0.5)(x)
    x = Dense(units=512, activation='relu')(x)
    x = Dropout(rate=0.4)(x)
    x = Dense(units=64, activation='relu')(x)
    x = Dropout(rate=0.3)(x)
    output = Dense(units=10, activation='softmax')(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    if verbose:
        model.summary()
    
    return model

INPUT_SIZE = tr_images.shape[1]
BATCH_SIZE = 64

# 모델 정의
model = create_model(input_size=INPUT_SIZE, verbose=False)
# 모델 compile
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
# callbacks
mc_call = ModelCheckpoint(filepath='/kaggle/working/models/weights.{epoch:02d}-{val_loss:.02f}.hdf5', monitor='val_loss', mode='min',
                         save_best_only=True, save_weights_only=True, period=5, verbose=0)
es_call = EarlyStopping(monitor='val_loss', mode='min', patience=5, verbose=1)
lr_call = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.2, patience=4, verbose=1)
# 모델 학습
train_hist = model.fit(x=tr_images, y=tr_ohe_labels, batch_size=BATCH_SIZE, epochs=20, validation_data=(val_images, val_ohe_labels),
                      callbacks=[mc_call, es_call, lr_call])
# 모델 평가
test_hist = model.evaluate(x=test_images, y=test_ohe_labels, batch_size=32)

2021-10-12 03:49:03.282832: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-12 03:49:03.286171: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-10-12 03:49:03.333205: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-12 03:49:03.333919: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0
coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s
2021-10-12 03:49:03.333994: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-10-12 03:49:03.361411: I tensorflow/stream_executor/platform/def

Epoch 1/20


2021-10-12 03:49:07.052867: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2021-10-12 03:49:07.809140: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2021-10-12 03:49:07.834020: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


OSError: Unable to create file (unable to open file: name = '/kaggle/working/models/weights.05-1.11.hdf5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

In [None]:
# 테스트 데이터 하나로 predict 하기 -> 한 개 예측해도 배치 사이즈 포함해서 4차원 shape으로 정의해주어야 함
single_img = np.reshape(test_images[0], (1, INPUT_SIZE, INPUT_SIZE, 3))
print('single_img shape:', single_img.shape)

# 위 모델에서 y값이 원-핫 인코딩 상태이기 때문에 -> 예측 결과도 원-핫 인코딩 형태로 나옴 -> argmax 해주어야 함
prediction = model.predict(x=single_img, verbose=1)
print('prediction 결과:', prediction.shape)
final_prediction = np.argmax(prediction, axis=1)
print('final_prediction 결과 클래스:', final_prediction, final_prediction[0])

# 모델 메모리에서 초기화
import tensorflow as tf
tf.keras.backend.clear_session()

## 2. Tensorflow.keras - ImageDataGenerator 파이프라인 활용
- ``flow()`` -> 이미지를 Numpy array 형태로 받아올 수 있을 때
- ``flow_from_directory()`` -> 이미지, 레이블을 디렉토리 경로 형태로 받아올 수 있을 때
- ``flow_from_dataframe()`` -> 이미지, 레이블을 디렉토리 경로로 받아올 수 있는데, 이 메타 데이터들을 pandas.dataframe에 저장할 수 있을 때

### 2-1. ``flow()``
- ``cifar10`` 데이터를 활용

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# ImageGenerator 데이터 유형에 맞게 정의 -> 적용할 증강 기법 정의
tr_gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rescale=1/255.)
val_gen = ImageDataGenerator(rescale=1/255.)
test_gen = ImageDataGenerator(rescale=1/255.)

# flow 메소드로 Numpy Array Iterator 만들기 -> 여기서 X, y, 배치 사이즈, 셔플 유무 결정
tr_gen_flow = tr_gen.flow(x=tr_images, y=tr_ohe_labels, batch_size=BATCH_SIZE, shuffle=True)
val_gen_flow = val_gen.flow(x=val_images, y=val_ohe_labels, batch_size=BATCH_SIZE, shuffle=False)
test_gen_flow = test_gen.flow(x=test_images, y=test_ohe_labels, batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의
model = create_model(input_size=INPUT_SIZE, verbose=False)
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
# callbacks
es_call = EarlyStopping(monitor='val_loss', mode='min', patience=4, verbose=1)
lr_call = ReduceLROnPlateau(monitor='val_loss', mode='min', patience=3, verbose=1)
# 모델 학습
train_hist = model.fit(tr_gen_flow, epochs=20, validation_data=val_gen_flow, callbacks=[es_call, lr_call])
# 모델 평가
test_hist = model.evaluate(test_gen_flow)

### 2-2. ``flow_from_directory()``
- 캐글 오픈 데이터 <a href='https://www.kaggle.com/tongpython/cat-and-dog'>cat-and-dog</a> 데이터 활용
- 인자로 넣어주는 디렉토리 경로의 바로 하위 디렉토리를 레이블, 그 밑의 이미지를 그 레이블에 해당하는 이미지로 추측하여 파이프라인 생성
    - 단, 레이블과 이미지 매칭안시켜주면 난장판 발생..
    - 또, Train/Valid 구분 안되어 있으면 이를 인위적으로 구분해주어야 하는데 매우 귀찮은 전처리 상황 발생..

In [None]:
# 모델은 전이학습 모델 사용 mobilenetv2
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, GlobalAveragePooling2D
from tensorflow.keras.models import Model


def create_mobilenet(input_size, verbose=False):
    input_tensor = Input(shape=(input_size, input_size, 3))
    base_model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    base_output = base_model.output
    # Classfier layer
    x = GlobalAveragePooling2D()(base_output)
    x = Dense(units=256, activation='relu')(x)
    output = Dense(units=1, activation='sigmoid')(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    if verbose:
        model.summary()
    
    return model

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

INPUT_SIZE = 224
BATCH_SIZE = 64

# ImageGenerator 데이터 유형에 맞게 정의 -> 적용할 증강 기법 정의
tr_gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rescale=1/255.)
test_gen = ImageDataGenerator(rescale=1/255.)

# Numpy Array Iterator 생성
directory = '/kaggle/input/cat-and-dog/'
tr_gen_flow = tr_gen.flow_from_directory(directory=directory, target_size=(INPUT_SIZE, INPUT_SIZE),
                                        class_mode='binary', batch_size=BATCH_SIZE, shuffle=True)
test_gen_flow = test_gen.flow_from_directory(directory=directory, target_size=(INPUT_SIZE, INPUT_SIZE),
                                             class_mode='binary', batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의
model = create_mobilenet(input_size=INPUT_SIZE, verbose=False)
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
# callbacks
mc_call = ModelCheckpoint(filepath='/kaggle/working/models/weights.{epoch:02d}-{loss:.02f}.hdf5',
                         monitor='loss', mode='min', verbose=1, period=1)
es_call = EarlyStopping(monitor='loss', mode='min', patience=4, verbose=1)
lr_call = ReduceLROnPlateau(monitor='loss', mode='min', patience=3, verbose=1)
# 모델 학습
train_hist = model.fit(tr_gen_flow, epochs=2, callbacks=[es_call, lr_call, mc_call])
# 모델 평가
test_hist = model.evaluate(test_gen_flow)

In [None]:
# 저장한 모델 중 최적의 파라미터 모델 가중치(hdf5 파일)로드해서 test 데이터로 재평가
# 1.위에서 저장할때 사용한 모델 아키텍처 동일하게 설계
# 2.load_weights로 가중치 로드 후 컴파일
# 3.test 데이터에 평가 및 예측

optimal_model = create_mobilenet(INPUT_SIZE, False)
optimal_model.load_weights(filepath='/kaggle/working/models/weights.02-0.48.hdf5')
optimal_model.compile(Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

test_gen = ImageDataGenerator(rescale=1/255.)
test_gen_flow = test_gen.flow_from_directory(directory=directory, target_size=(INPUT_SIZE, INPUT_SIZE),
                                             class_mode='binary', batch_size=BATCH_SIZE, shuffle=False)

test_hist_new = optimal_model.evaluate(test_gen_flow)

### 2-3. ``flow_from_dataframe()``
- label 값들은 수치형 인코딩된 상태가 아닌 문자열의 클래스 상태이어야 함

In [None]:
import os
import pandas as pd

paths = []
types = []
labels = []

for dirname, _, filenames in os.walk('/kaggle/input/cat-and-dog'):
    for filename in filenames:
        if '.jpg' in filename:
            path = os.path.join(dirname, filename)
            paths.append(path)
            if '/training_set/' in path:
                types.append('train')
            elif '/test_set/' in path:
                types.append('test')
            else:
                pass
            
            if 'dogs' in path:
                labels.append('dog')
            elif 'cats' in path:
                labels.append('cat')
            else:
                pass

data_df = pd.DataFrame({'path': paths, 'type': types, 'label': labels})
data_df.shape

In [None]:
from sklearn.model_selection import train_test_split

# label 값을 LabelEncoding 하기
data_df['label_enc'] = pd.factorize(data_df['label'])[0]
# Train, Test 분할
train_df = data_df[data_df['type'] == 'train']
test_df = data_df[data_df['type'] == 'test']
# Train, Valid 분할
tr_df, val_df = train_test_split(train_df, stratify=train_df['label'], test_size=0.15, random_state=42)
print('Train:', tr_df.shape)
print('Valid:', val_df.shape)
print('Test:', test_df.shape)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 데이터 유형에 맞게 ImageDataGenerator 정의
tr_gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rescale=1/255.)
val_gen = ImageDataGenerator(rescale=1/255.)
test_gen = ImageDataGenerator(rescale=1/255.)

# Numpy Array Iterator
tr_gen_flow = tr_gen.flow_from_dataframe(dataframe=tr_df, x_col='path', y_col='label',
                                        target_size=(INPUT_SIZE, INPUT_SIZE), class_mode='binary',
                                        batch_size=BATCH_SIZE, shuffle=True)
val_gen_flow = val_gen.flow_from_dataframe(dataframe=val_df, x_col='path', y_col='label',
                                          target_size=(INPUT_SIZE, INPUT_SIZE), class_modoe='binary',
                                          batch_size=BATCH_SIZE, shuffle=False)
test_gen_flow = test_gen.flow_from_dataframe(dataframe=test_df, x_col='path', y_col='label',
                                            target_size=(INPUT_SIZE, INPUT_SIZE), class_mode='binary',
                                            batch_size=BATCH_SIZE, shuffle=False)

# 모델 정의
model = create_mobilenet(INPUT_SIZE, verbose=False)
# 모델 컴파일
model.compile(Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
# callbacks
es_call = EarlyStopping(monitor='loss', mode='min', patience=4, verbose=1)
lr_call = ReduceLROnPlateau(monitor='loss', mode='min', patience=3, verbose=1)
# 모델 학습
train_hist = model.fit(tr_gen_flow, epochs=2, validation_data=val_gen_flow, callbacks=[es_call, lr_call])
# 모델 평가
test_hist = model.evaluate(test_gen_flow)

## Tensorflow Sequence & Albumentation 결합
- ``tensorflow.keras.utils.Sequenct`` 클래스를 상속받아서 제네레이터 커스텀해서 정의
- ``__len__`` 매직 메서드 새로 정의 : 한 에포크 당 배치 사이즈가 도는 steps 계산하는 것으로 변경
- ``__getitem__`` 매직 메서드(리스트 인덱싱 매직메서드) 새로 정의 : 정의한 배치 사이즈만큼 데이터 소스에서 이미지, 레이블 가져와 -> 이미지 가공 처리 -> 배치 사이즈만큼의 이미지, 레이블 반환
- ``on_epoch_end``: 한 에포크 마다 데이터 셔플할지 결정

In [None]:
from tensorflow.keras.utils import Sequence
import sklearn
import cv2
import numpy as np

INPUT_SIZE = 224

class CustomDS(Sequence):
    def __init__(self, images, labels, batch_size, shuffle=False, augmentor=None, pre_func=None):
        """
        images: 이미지 파일이 존재하는 디렉토리 경로 or Numpy array 형태의 이미지 -> 지금은 디렉토리 경로라고 가정
        labels: 수치형으로 변환된 레이블
        augmentor: Albumentation과 같은 써드파티 이미지 증강 모듈
        pre_func: 이미지 픽셀값 스케일링 함수
        """
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augmentor = augmentor
        self.pre_func = pre_func
        
        # Keras에서는 데이터 구간이 1 ~ 10이 있다면 순차적으로 batch_size 만큼 차례대로 학습하기 떄문에 최초에만 데이터 셔플함
        if self.shuffle:
            self.on_epoch_end()
        
    def __len__(self):
        steps = int(np.ceil(self.images.shape[0] / self.batch_size))
        return steps
    
    def __getitem__(self, index):
        images_batch = self.images[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            labels_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        images_batch_after = np.zeros((images_batch.shape[0], INPUT_SIZE, INPUT_SIZE, 3))
        for idx in range(images_batch.shape[0]):
            single_img = cv2.cvtColor(cv2.imread(images_batch[idx]), cv2.COLOR_BGR2RGB)
            single_img = cv2.resize(single_img, (INPUT_SIZE, INPUT_SIZE))
            if self.augmentor is not None:
                single_img = self.augmentor(image=single_img)['image']
            if self.pre_func is not None:
                single_img = self.pre_func(single_img)
            images_batch_after[idx] = single_img
        return images_batch_after, labels_batch
    
    def on_epoch_end(self):
        print('#### 셔플 수행! ####')
        self.images, self.labels = sklearn.utils.shuffle(self.images, self.labels)

In [None]:
# Augmentor, Scaling Function
import albumentations as A
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_pre_func
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

augmentor = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.OneOf([
        A.CLAHE(p=0.5),
        A.ShiftScaleRotate(p=0.5)
    ])
])

# Custom Image Generator
train_gen = CustomDS(images=tr_df['path'].values, labels=tr_df['label_enc'].values, batch_size=64,
                    shuffle=True, augmentor=augmentor, pre_func=mobilenet_pre_func)
val_gen = CustomDS(images=val_df['path'].values, labels=val_df['label_enc'].values, batch_size=64,
                  shuffle=False, augmentor=None, pre_func=mobilenet_pre_func)
test_gen = CustomDS(images=test_df['path'].values, labels=test_df['label_enc'].values, batch_size=64,
                   shuffle=False, augmentor=None, pre_func=mobilenet_pre_func)

# Model
model = create_mobilenet(input_size=INPUT_SIZE, verbose=False)
# compile
model.compile(Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
# callbacks
mc_call = ModelCheckpoint(filepath='/kaggle/working/models/weights.{epoch:02d}-{val_loss:.02f}.hdf5',
                         monitor='val_loss', mode='min', verbose=0, period=1)
lr_call = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.2, patience=5, verbose=1)
es_call = EarlyStopping(monitor='val_loss', mode='min', patience=7, verbose=1)
# fit
train_hist = model.fit(train_gen, epochs=3, validation_data=val_gen, callbacks=[mc_call, lr_call, es_call])
# evaluate
evalutation = model.evaluate(test_gen)
# Predict
# model.predict(image_array ~)

## AlexNet

## VGG

## GoogleNet(Inception)

In [None]:
# Inception Module 생성 함수
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Concatenate


def inception_module(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool_proj, name=None):
    """
    x: previous layer
    filters_1x1 : x에 1x1 컨볼루션 적용할 때 만들 필터개수
    filters_3x3_reduce : x에 3x3 컨볼루션 적용 전 1x1 컨볼루션 적용할 때 만들 필터 개수
    filters_3x3 : filters_3x3_reduce 이후에 적용할 원래 3x3 컨볼루션 시 만들 필터 개수
    filters_5x5_reduce : x에 5x5 컨볼루션 적용 전 1x1 컨볼루션 적용할 때 만들 필터 개수
    filters_5x5 : filters_5x5_reduce 이후에 적용할 원래 5x5 컨볼루션 시 만들 필터 개수
    filters_pool : x에 3x3 max pooling 취한 후에 1x1 컨볼루션 적용할 때 만들 필터 개수
    name : 인셉션 모듈 id
    """
    conv_1x1 = Conv2D(filters=filters_1x1, kernel_size=1, padding='same', activation='relu')(x)
    
    conv_3x3 = Conv2D(filters=filters_3x3_reduce, kernel_size=1, padding='same', activation='relu')(x)
    conv_3x3 = Conv2D(filters=filters_3x3, kernel_size=3, padding='same', activation='relu')(conv_3x3)
    
    conv_5x5 = Conv2D(filters=filters_5x5_reduce, kernel_size=1, padding='same', activation='relu')(x)
    conv_5x5 = Conv2D(filters=filters_5x5, kernel_size=5, padding='same', activation='relu')(conv_5x5)
    
    conv_pool = MaxPooling2D(pool_size=3, strides=1, padding='same')(x)
    conv_pool = Conv2D(filters=filters_pool_proj, kernel_size=1, padding='same', activation='relu')(conv_pool)
    # Concatenate -> 합칠 layer들을 리스트로 functional API 형태로 넣어주기
    module_output = Concatenate(axis=-1, name=name)([conv_1x1, conv_3x3, conv_5x5, conv_pool])
    return module_output

In [None]:
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model

def create_googlenet(in_shape=(224, 224, 3), n_classes=10):
    input_tensor = Input(shape=in_shape)
    
    x = Conv2D(64, (7, 7), padding='same', strides=(2, 2), activation='relu', name='conv_1_7x7/2')(input_tensor)
    x = MaxPooling2D((3, 3), padding='same', strides=(2, 2), name='max_pool_1_3x3/2')(x)
    x = Conv2D(64, (1, 1), padding='same', strides=(1, 1), activation='relu', name='conv_2a_3x3/1')(x)
    x = Conv2D(192, (3, 3), padding='same', strides=(1, 1), activation='relu', name='conv_2b_3x3/1')(x)
    x = MaxPooling2D((3, 3), padding='same', strides=(2, 2), name='max_pool_2_3x3/2')(x)
    
       # 첫번째 inception 모듈
    x = inception_module(x, filters_1x1=64,
                         filters_3x3_reduce=96,
                         filters_3x3=128,
                         filters_5x5_reduce=16,
                         filters_5x5=32,
                         filters_pool_proj=32,
                         name='inception_3a')
    # 두번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=128,
                         filters_3x3_reduce=128,
                         filters_3x3=192,
                         filters_5x5_reduce=32,
                         filters_5x5=96,
                         filters_pool_proj=64,
                         name='inception_3b')

    x = MaxPooling2D((3, 3), padding='same', strides=(2, 2), name='max_pool_3_3x3/2')(x)
    
    # 세번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=192,
                         filters_3x3_reduce=96,
                         filters_3x3=208,
                         filters_5x5_reduce=16,
                         filters_5x5=48,
                         filters_pool_proj=64,
                         name='inception_4a')
    # 네번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=160,
                         filters_3x3_reduce=112,
                         filters_3x3=224,
                         filters_5x5_reduce=24,
                         filters_5x5=64,
                         filters_pool_proj=64,
                         name='inception_4b')
    
    # 다섯번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=128,
                         filters_3x3_reduce=128,
                         filters_3x3=256,
                         filters_5x5_reduce=24,
                         filters_5x5=64,
                         filters_pool_proj=64,
                         name='inception_4c')
    # 여섯번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=112,
                         filters_3x3_reduce=144,
                         filters_3x3=288,
                         filters_5x5_reduce=32,
                         filters_5x5=64,
                         filters_pool_proj=64,
                         name='inception_4d')
    # 일곱번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=256,
                         filters_3x3_reduce=160,
                         filters_3x3=320,
                         filters_5x5_reduce=32,
                         filters_5x5=128,
                         filters_pool_proj=128,
                         name='inception_4e')

    x = MaxPooling2D((3, 3), padding='same', strides=(2, 2), name='max_pool_4_3x3/2')(x)
    # 여덟번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=256,
                         filters_3x3_reduce=160,
                         filters_3x3=320,
                         filters_5x5_reduce=32,
                         filters_5x5=128,
                         filters_pool_proj=128,
                         name='inception_5a')
    # 아홉번째 inception 모듈
    x = inception_module(x,
                         filters_1x1=384,
                         filters_3x3_reduce=192,
                         filters_3x3=384,
                         filters_5x5_reduce=48,
                         filters_5x5=128,
                         filters_pool_proj=128,
                         name='inception_5b')
    
    # Classifier Layer
    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate=0.4)(x)
    x = Dense(units=128)(x)
    output = Dense(units=n_classes)(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    return model

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

import random as python_random
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import Sequence
import cv2
import sklearn

def zero_one_scaler(image):
    return image/255.0

def get_preprocessed_ohe(images, labels, pre_func=None):
    # preprocessing 함수가 입력되면 이를 이용하여 image array를 scaling 적용.
    if pre_func is not None:
        images = pre_func(images)
    # OHE 적용    
    oh_labels = to_categorical(labels)
    return images, oh_labels

# 학습/검증/테스트 데이터 세트에 전처리 및 OHE 적용한 뒤 반환 
def get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.15, random_state=2021):
    # 학습 및 테스트 데이터 세트를  0 ~ 1사이값 float32로 변경 및 OHE 적용. 
    train_images, train_oh_labels = get_preprocessed_ohe(train_images, train_labels)
    test_images, test_oh_labels = get_preprocessed_ohe(test_images, test_labels)
    
    # 학습 데이터를 검증 데이터 세트로 다시 분리
    tr_images, val_images, tr_oh_labels, val_oh_labels = train_test_split(train_images, train_oh_labels, test_size=valid_size, random_state=random_state)
    
    return (tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels)

class CustomDS(Sequence):
    def __init__(self, images, labels, batch_size, shuffle, augmentor=None, pre_func=None):
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augmentor = augmentor
        self.pre_func = pre_func
        
        if self.shuffle:
            self.on_epoch_end()
            
    def __len__(self):
        steps = int(np.ceil(self.images.shape[0] / self.batch_size))
        return steps
    
    def __getitem__(self, index):
        images_batch = self.images[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            labels_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        images_batch_after = np.zeros((images_batch.shape[0], INPUT_SIZE, INPUT_SIZE, 3), dtype=np.float32)
        for idx in range(images_batch.shape[0]):
            single_img = images_batch[idx]
            single_img = cv2.resize(single_img, (INPUT_SIZE, INPUT_SIZE))
            if self.augmentor is not None:
                single_img = augmentor(image=single_img)['image']
            if self.pre_func is not None:
                single_img = self.pre_func(single_img)
            images_batch_after[idx] = single_img
        return images_batch_after, labels_batch
    
    def on_epoch_end(self):
        if self.shuffle:
            self.images, self.labels = sklearn.utils.shuffle(self.images, self.labels)

In [None]:
# CIFAR10 데이터 재 로딩 및 Scaling/OHE 전처리 적용하여 학습/검증/데이터 세트 생성. 
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

(tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels) = \
    get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.2, random_state=2021)
print(tr_images.shape, tr_oh_labels.shape, val_images.shape, val_oh_labels.shape, test_images.shape, test_oh_labels.shape)

In [None]:
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_preprocess
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

INPUT_SIZE = 128
BATCH_SIZE = 64

tr_ds = CustomDS(tr_images, tr_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=True, pre_func=inception_preprocess)
val_ds = CustomDS(val_images, val_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=inception_preprocess)

model = create_googlenet(in_shape=(INPUT_SIZE, INPUT_SIZE, 3), n_classes=10)
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

lr_call = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.2, patience=4, verbose=1)
es_call = EarlyStopping(monitor='val_loss', mode='min', patience=6, verbose=1)

history = model.fit(tr_ds, epochs=10, validation_data=val_ds, callbacks=[lr_call, es_call])

## ResNet
#### 1. Identity Block 생성

In [None]:
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation
from tensorflow.keras.layers import add, Add

# ResNet의 Identity Block(=Residual Block) 생성하는 함수
def identity_block(input_tensor, middle_kernel_size, filters: list, stage, block):
    """ Identity Block 생성
    input_tensor: 입력 레이어 또는 previous 레이어
    middle_kernel_size: Block 내 적용할 컨볼루션 중앙값(3 by 3 or 5 by 5)
    filters: Block 내 적용할 3번의 컨볼루션 시 out시킬 필터개수. 리스트 형태
    """
    filter1, filter2, filter3 = filters # filter1,2는 압축할 필터개수, filter3는 input_tensor와 같은 필터수
    conv_name = f'res{stage}{block}_branch' # 명시할 컨볼루션 레이어 이름
    bn_name = f'BN{stage}{block}_branch'    # 명시할 BatchNormalization 레이어 이름
    
    x = Conv2D(filters=filter1, kernel_size=(1, 1), kernel_initializer='he_normal', name=conv_name+'2a')(input_tensor)
    x = BatchNormalization(axis=-1, name=bn_name+'2a')(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'2b')(x)
    x = BatchNormalization(axis=-1, name=bn_name+'2b')(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters=filter3, kernel_size=(1, 1), kernel_initializer='he_normal', name=conv_name+'2c')(x)
    x = BatchNormalization(axis=-1, name=bn_name+'2c')(x)
    
    # Add
    x = Add()([input_tensor, x])
    # x = add([input_tensor, x]) 도 가능`
    x = Activation('relu')(x)
    
    return x

#### 2. 각 Stage 내의 첫 번째 Identity Block에서 이전 Layer의 사이즈를 절반으로 줄이는 역할의 Block 생성
- 위 함수랑 거의 동일하되 첫번째 컨볼루션 때 stride를 적용해야 함

In [None]:
def conv_block(input_tensor, middle_kernel_size, filters, stage, block, strides=(2, 2)):
    filter1, filter2, filter3 = filters
    conv_name = f'res{stage}{block}_branch'
    bn_name = f'bn{stage}{block}_branch'
    
    # 1.첫 번째에 이전의 피처맵 사이즈를 줄이기 위해 컨볼루션에 strides 적용
    x = Conv2D(filters=filter1, kernel_size=(1, 1), strides=strides, kernel_initializer='he_normal', name=conv_name+'2a')(input_tensor)
    x = BatchNormalization(axis=-1, name=bn_name+'2a')(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters=filter2, kernel_size=middle_kernel_size, padding='same', kernel_initializer='he_normal', name=conv_name+'2b')(x)
    x = BatchNormalization(axis=-1, name=bn_name+'2b')(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters=filter3, kernel_size=(1, 1), kernel_initializer='he_normal', name=conv_name+'2c')(x)
    x = BatchNormalization(axis=-1, name=bn_name+'2c')(x)
    
    # 이전 Layer 피처 맵 사이즈를 1번에서 줄였기 때문에 Skip Connection 할때도, 이전 Layer의 사이즈를 줄여서 넘겨주어야 함!
    shortcut = Conv2D(filters=filter3, kernel_size=(1, 1), strides=strides, kernel_initializer='he_normal', name=conv_name+'shortcut_conv')(input_tensor)
    shortcut = BatchNormalization(axis=-1, name=bn_name+'shortcut_bn')(shortcut)
    
    # Add
    x = Add()([x, shortcut])
    
    x = Activation('relu')(x)
    
    return x

#### 3. ResNet의 가장 intro 컨볼루션 레이어 생성
- 입력 이미지의 Receptive Field를 넓게 해서 다양한 특징을 추출하기 위해 넓은 커널사이즈 컨볼루션 적용
- 해당 예시에서는 원본 입력 이미지 사이즈는 (224, 224, 3)으로 설정

In [None]:
from tensorflow.keras.layers import ZeroPadding2D, MaxPooling2D

def do_first_conv(input_tensor):
    # 7 by 7 컨볼루션 & strides=2 이후 224의 절반 112사이즈를 맞춰주기 위해서 제로패딩 추가
    x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(input_tensor)
    x = Conv2D(filters=64, kernel_size=(7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', name='conv1')(x)
    x = BatchNormalization(axis=-1, name='bn_conv1')(x)
    x = Activation('relu')(x)
    
    # 3 by 3 맥스풀링 & Stride=2 적용하기 위해 제로패딩 또 추가
    x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
    
    return x

#### ResNet 생성 함수(위 만든 함수들 모두 사용)

In [None]:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense

def create_resnet(in_shape=(224, 224, 3), n_classes=10):
    input_tensor = Input(shape=in_shape)
    # 초기 컨볼루션 적용
    x = do_first_conv(input_tensor)
    # 여러개의 Identity Block 생성
    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1,1)) # strides=1로 설정해서 피처맵 사이즈 안줄임!
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    
    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

    x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
    x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
    
    # Classifier Layer
    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate=0.3)(x)
    x = Dense(units=200, activation='relu')(x)
    x = Dropout(rate=0.3)(x)
    output = Dense(units=n_classes, activation='softmax')(x)
    
    model = Model(inputs=input_tensor, outputs=output)
#     model.summary()
    return model

In [None]:
import random as python_random
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import Sequence
import cv2
import sklearn
import numpy as np

def zero_one_scaler(image):
    return image/255.0

def get_preprocessed_ohe(images, labels, pre_func=None):
    # preprocessing 함수가 입력되면 이를 이용하여 image array를 scaling 적용.
    if pre_func is not None:
        images = pre_func(images)
    # OHE 적용    
    oh_labels = to_categorical(labels)
    return images, oh_labels

# 학습/검증/테스트 데이터 세트에 전처리 및 OHE 적용한 뒤 반환 
def get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.15, random_state=2021):
    # 학습 및 테스트 데이터 세트를  0 ~ 1사이값 float32로 변경 및 OHE 적용. 
    train_images, train_oh_labels = get_preprocessed_ohe(train_images, train_labels)
    test_images, test_oh_labels = get_preprocessed_ohe(test_images, test_labels)
    
    # 학습 데이터를 검증 데이터 세트로 다시 분리
    tr_images, val_images, tr_oh_labels, val_oh_labels = train_test_split(train_images, train_oh_labels, test_size=valid_size, random_state=random_state)
    
    return (tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels )


IMAGE_SIZE = 128
BATCH_SIZE = 64

class CustomDS(Sequence):
    def __init__(self, images, labels, batch_size, shuffle=False, augmentor=None, pre_func=None):
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augmentor = augmentor
        self.pre_func = pre_func
        if self.shuffle:
            self.on_epoch_end()
        
    def __len__(self):
        steps = int(np.ceil(self.images.shape[0] / self.batch_size))
        return steps
    
    def __getitem__(self, index):
        images_batch = self.images[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            labels_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        images_batch_after = np.zeros((images_batch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
        for idx in range(images_batch.shape[0]):
            single_img = images_batch[idx]
            single_img = cv2.resize(single_img, (IMAGE_SIZE, IMAGE_SIZE))
            if self.augmentor is not None:
                single_img = self.augmentor(image=single_img)['image']
            if self.pre_func is not None:
                single_img = self.pre_func(single_img)
            images_batch_after[idx] = single_img
        return images_batch_after, labels_batch
    
    def on_epoch_end(self):
        self.images, self.labels = sklearn.utils.shuffle(self.images, self.labels)

In [None]:
# CIFAR10 데이터 재 로딩 및 OHE 전처리 적용하여 학습/검증/데이터 세트 생성. 
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

(tr_images, tr_oh_labels), (val_images, val_oh_labels), (test_images, test_oh_labels) = \
    get_train_valid_test_set(train_images, train_labels, test_images, test_labels, valid_size=0.2, random_state=2021)
print(tr_images.shape, tr_oh_labels.shape, val_images.shape, val_oh_labels.shape, test_images.shape, test_oh_labels.shape)

from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess

tr_ds = CustomDS(tr_images, tr_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=True, pre_func=resnet_preprocess)
val_ds = CustomDS(val_images, val_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=resnet_preprocess)
test_ds = CustomDS(test_images, test_oh_labels, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=resnet_preprocess)

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

resnet_model = create_resnet(in_shape=(128, 128, 3), n_classes=10)
resnet_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

mc_call = ModelCheckpoint(filepath='/kaggle/working/weights.{epoch:02d}-{val_loss:.02f}.hdf5', monitor='val_loss', mode='min', verbose=0)
lr_call = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.2, patience=5, verbose=1)
es_call = EarlyStopping(monitor='val_loss', mode='min', patience=6, verbose=1)

# fit
history = resnet_model.fit(tr_ds, epochs=2, validation_data=val_ds, callbacks=[mc_call, lr_call, es_call])
# evaluate
test_hist = resnet_model.evaluate(test_ds)

## Fine Tuning
- Cat and Dog 데이터로 수행

In [None]:
import pandas as pd
import numpy as np
import os

def make_catndog_df():
    paths = []
    types = []
    labels = []
    for dirname, _, filenames in os.walk('/kaggle/input/cat-and-dog'):
        for filename in filenames:
            if '.jpg' in filename:
                path = os.path.join(dirname, filename)
                paths.append(path)
                if '/training_set/' in path:
                    types.append('train')
                elif '/test_set/' in path:
                    types.append('test')
                else:
                    pass
                if 'dogs' in path:
                    labels.append('dog')
                elif 'cats' in path:
                    labels.append('cat')
                else:
                    pass
                
    data_df = pd.DataFrame({'path':paths, 'type': types, 'label': labels})
    print(data_df.shape)
    return data_df

data_df = make_catndog_df()

In [None]:
# Custom Datset 만들기
from tensorflow.keras.utils import Sequence
import cv2
import sklearn

IMAGE_SIZE = 160
BATCH_SIZE = 64

class CustomDS(Sequence):
    def __init__(self, image_paths, labels, batch_size, shuffle=False, augmentor=None, pre_func=None):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augmentor = augmentor
        self.pre_func = pre_func
        
        if self.shuffle:
            self.on_epoch_end()
        
    def __len__(self):
        steps = int(np.ceil(self.image_paths.shape[0] / self.batch_size))
        return steps
    
    def __getitem__(self, index):
        image_batch = self.image_paths[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        image_batch_after = np.zeros((image_batch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
        for idx in range(image_batch.shape[0]):
            single_img = cv2.cvtColor(cv2.imread(image_batch[idx]), cv2.COLOR_BGR2RGB)
            single_img = cv2.resize(single_img, (IMAGE_SIZE, IMAGE_SIZE))
            if self.augmentor is not None:
                single_img = augmentor(image=single_img)['image']
            if self.pre_func is not None:
                single_img = self.pre_func(single_img)
            image_batch_after[idx] = single_img
        
        return image_batch_after, label_batch
    
    def on_epoch_end(self):
        if self.shuffle:
            self.image_paths, self.labels = sklearn.utils.shuffle(self.image_paths, self.labels)

In [None]:
from sklearn.model_selection import train_test_split

# 학습 데이터의 50%를 검증 데이터에 할당. 
def get_train_valid_test(data_df):
    # 학습 데이터와 테스트 데이터용 Dataframe 생성. 
    train_df = data_df[data_df['type']=='train']
    test_df = data_df[data_df['type']=='test']

    # 학습 데이터의 image path와 label을 Numpy array로 변환 및 Label encoding
    train_path = train_df['path'].values
    train_label = pd.factorize(train_df['label'])[0]
    
    test_path = test_df['path'].values
    test_label = pd.factorize(test_df['label'])[0]

    tr_path, val_path, tr_label, val_label = train_test_split(train_path, train_label, test_size=0.5, random_state=2021)
    print('학습용 path shape:', tr_path.shape, '검증용 path shape:', val_path.shape, 
      '학습용 label shape:', tr_label.shape, '검증용 label shape:', val_label.shape)
    return tr_path, tr_label, val_path, val_label, test_path, test_label

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam , RMSprop 
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception, MobileNetV2

def create_model(model_name='mobilenet', verbose=False):
    
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    if model_name == 'vgg16':
        base_model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50':
        base_model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'xception':
        base_model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'mobilenet':
        base_model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    
    bm_output = base_model.output

    x = GlobalAveragePooling2D()(bm_output)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu', name='fc1')(x)
    # 최종 output 출력을 softmax에서 sigmoid로 변환. 
    output = Dense(1, activation='sigmoid', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    
    if verbose:
        model.summary()
        
    return model

In [None]:
# Fine Tuning 하기 위해서 Pretrained Model 레이어 구조 보기
model = create_model('mobilenet', verbose=False)

# model에 Layer 속성값이 리스트로 들어있음
print(type(model.layers), model.layers[:10])

In [None]:
# 레이어 이름과 각 레이어의 Trainable 상태 볼 수 있음 -> 이것을 활용해서 Fine Tuning 가능함
for layer in model.layers[0:10]:
    print('Name:', layer.name, 'Trainable:', layer.trainable)

In [None]:
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_pre_func
from tensorflow.keras import layers

FIRST_EPOCHS = 10
SECOND_EPOCHS = 10

tr_path, tr_label, val_path, val_label, test_path, test_label = get_train_valid_test(data_df)

tr_ds = CustomDS(tr_path, tr_label, BATCH_SIZE, shuffle=True, augmentor=None, pre_func=mobilenet_pre_func)
val_ds = CustomDS(val_path, val_label, BATCH_SIZE, shuffle=False, augmentor=None, pre_func=mobilenet_pre_func)

# Pretrained Model 로드
model = create_model(model_name='mobilenet', verbose=False)
model.compile(Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
# Feature Extractor Layer들은 파라미터 Freeze 시키기
for layer in model.layers[:-4]:
    layer.trainable = False
# Freeze 시킨 상태에서 10번 학습(즉, Custom으로 만든 Classifier Layer들만 파라미터 학습)
first_hist = model.fit(tr_ds, epochs=FIRST_EPOCHS, validation_data=val_ds)

# 모든 Layer들 다 Freeze 풀고 파라미터 학습(단, BN 레이어는 여전히 Freeze 시키기. Keras에선 그렇게 하는 것을 권고한다고 함..)
for layer in model.layers:
    if not isinstance(layer, layers.BatchNormalization):  # layer객체가 tf.keras.layers.BatchNormalization 클래스인지 확인
        layer.trainable = True

# 다시 학습시키기 위해 모델 재 컴파일 -> 학습률 약간 낮추기
model.compile(Adam(lr=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
second_hist = model.fit(tr_ds, epochs=SECOND_EPOCHS, validation_data=val_ds)

## Learning Rate Scheduler
- Epoch Step에 따라 특정한 규칙을 적용해서 학습률을 조정
- ``tf.keras.callbacks.LearningRateScheduler(scheduler_func, verbose=1)`` 형태로 사용자가 ``scheduler_func``의 Customized 함수를 만들어 넣으면 됨
- ``scheduler_func``은 인자를 ``epoch``와 ``lr``을 입력받아서 학습률을 내맘대로 조정히고 조정된 학습률을 return 하도록 작성
    - ``epoch`` 인자만을 입력받을 수 있긴한데, 그러려면 ``scheduler_func`` 함수 내부에서 최초의 lr를 정의해주어야 함!
    - ``epoch``의 인덱스는 0부터 시작하기 때문에, 최초의 epoch는 ``epoch = 0`` 임. 그러므로 함수 내부에서 작성해줄 때 유의
    - <a href='https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/LearningRateScheduler'>관련 문서</a>

In [None]:
# Exponentially decay
import numpy as np
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

def scheduler_func(epoch, lr):
    if epoch < 1:
        return lr
    else:
        return lr * np.exp(-1.0)

model = Sequential([Dense(20), Dense(5)])
model.compile(SGD(), loss='mse')
print('최초 SGD lr:', round(model.optimizer.lr.numpy(), 5))

lr_scheduler = LearningRateScheduler(scheduler_func, verbose=1)
history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5),
                   epochs=15, callbacks=[lr_scheduler])

In [None]:
# Step decay
def step_decay(epoch):
    init_lr = 0.1
    drop = 0.5
    epochs_drop = 5.0
    lr = init_lr * (drop ** np.floor(epoch/epochs_drop))
    print('epoch:', epoch, 'lr:', lr)
    return lr

model = Sequential([Dense(20), Dense(5)])
model.compile(SGD(), loss='mse')
print('최초 lr:', round(model.optimizer.lr.numpy(), 5))

lr_scheduler = LearningRateScheduler(step_decay, verbose=1)
history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), epochs=20, callbacks=[lr_scheduler])

In [None]:
# Ramp Up and Step Down Decay
def lrfn(epoch):
    LR_START = 1e-5
    LR_MAX = 1e-2
    LR_RAMPUP_EPOCHS = 3
    LR_SUSTAIN_EPOCHS = 3
    LR_STEP_DECAY = 0.75
    
    def calc_fn(epoch):
        if epoch < LR_RAMPUP_EPOCHS:
            lr = ((LR_MAX - LR_START) / LR_RAMPUP_EPOCHS) * epoch + LR_START
        elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
            lr = LR_MAX
        else:
            lr = LR_MAX * LR_STEP_DECAY**((epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS)//2)
        
        print('epoch:', epoch, 'lr:', lr)
        
        return lr
    
    # 반드시 내포 함수인 calc_fn(epoch)를 호출해야함. 
    return calc_fn(epoch)

import tensorflow as tf
import numpy as np


model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])
model.compile(tf.keras.optimizers.SGD(), loss='mse')

lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lrfn)
history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5),
                    epochs=30, callbacks=[lr_scheduler], verbose=1)

### ``tf.keras.experimental``로 Learning Rate Scheduler 하기
- 단, ``tf.keras.experimental``는 ``tf.keras.optimizers.schedule.LearningRateSchedule``을 상속받기 때문에 모델 comile시 optimizer 설정에 넣어주어야 함!
- optimizer 설정에 넣어주기 때문에 해당 Learning Rate Scheduler 방법은 학습 시 배치 단위로 학습률을 조정함
    - ``tf.keras.callbacks.LearningRateScheduler``는 epoch 단위로 학습률을 조정했음!

In [None]:
from tensorflow.keras.experimental import CosineDecay
import matplotlib.pyplot as plt

def plot_scheduler(epochs_lst, lr_lst, title=None):
    plt.figure(figsize=(6, 4))
    plt.plot(epochs_lst, lr_lst)
    plt.xlabel('epoch')
    plt.ylabel('Learning Rate')
    plt.title(title)
    plt.show()

cos_decay = CosineDecay(initial_learning_rate=0.01, decay_steps=30, alpha=0.0, name='test')  # name 속성 넣으면 에러 발생

steps_lst = range(0, 30)
lr_lst = cos_decay(steps_lst)

plot_scheduler(steps_lst, lr_lst, title='Cosine Decay')

In [None]:
# 모델에 넣어보기
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.experimental import CosineDecay
import numpy as np

cos_decay = CosineDecay(initial_learning_rate=0.1, decay_steps=30, alpha=0.0, name='Cosine_Decay')

model = Sequential([Dense(10), Dense(5)])
model.compile(Adam(learning_rate=cos_decay), loss='mse')

model.fit(np.arange(50).reshape(5, -1), np.zeros((5)), epochs=30, verbose=1)

In [None]:
# Cosine Decay Restart
from tensorflow.keras.experimental import CosineDecayRestarts

# t_mul은 first_decay_steps를 할 때마다 몇 배 단위로 할 것인지
# m_mul은 warm restart시 마다 적용될 학습률을 그 전 학습률에 비해 얼마나 늘리거나 줄일지
cos_decay_restart = CosineDecayRestarts(initial_learning_rate=0.1, first_decay_steps=10, t_mul=2, m_mul=0.8, alpha=0.0)

steps_lst = [x for x in range(100)]
lr_lst = cos_decay_restart(steps_lst)

plot_scheduler(steps_lst, lr_lst, title='Cosine_Decay_Restart')