<a href="https://colab.research.google.com/github/yangjiwoong1/AIFFEL_quest_rs/blob/main/GoingDeeper/Quest01/ablation_study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import tensorflow_datasets as tfds
import keras
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd

In [11]:
(ds_train, ds_test), ds_info = tfds.load(
    'cats_vs_dogs',
    split=['train[:80%]', 'train[80%:]'],
    as_supervised=True,
    shuffle_files=True,
    with_info=True,
)

In [25]:
batch_size = 32
img_size = (224, 224)

def preprocess(image, label):
    image = tf.image.resize(image, img_size)       # (224, 224, 3)로 resize
    image = tf.cast(image, tf.float32) / 255.0     # 0~1 정규화
    return image, label

# 전처리
ds_train = ds_train.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.batch(batch_size).prefetch(tf.data.AUTOTUNE)
ds_test = ds_test.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.batch(batch_size).prefetch(tf.data.AUTOTUNE)

## 모델 함수 정의
+ plain net
+ ResNet

In [15]:
class AddResidual(keras.layers.Layer):
    def __init__(self):
        super().__init__()

    def call(self, inputs):
        return inputs[0] + inputs[1]

def build_resnet_blocks(input_tensor, n_blocks, n_convs, kernel_sizes, n_channels, is_first_stage = False, is_plain = False):
    x = input_tensor
    for index_block in range(n_blocks):
        # projection (1x1 conv) 조건
        residual = x

        for index_conv in range(n_convs):
            # stride (2,2) 조건
            is_downsampling_layer = (not is_first_stage and index_block == 0 and index_conv == 0)

            residual = keras.layers.Conv2D(
                filters = n_channels[index_conv],
                kernel_size = kernel_sizes[index_conv],
                strides = (2,2) if is_downsampling_layer else (1,1),
                padding = 'same',
                use_bias = False
            )(residual)
            residual = keras.layers.BatchNormalization()(residual)

            # 마지막 레이어는 ReLU 적용 X
            if index_conv != n_convs - 1:
                residual = keras.layers.ReLU()(residual)

        if not is_plain:
            # 하나의 블록이 만들어지면, residual과 x를 연결
            # Downsampling이 필요한 경우 (stage3, stage4, stage5의 첫 번째 블록)
            if not is_first_stage and index_block == 0:
                x = keras.layers.Conv2D(
                    filters=n_channels[-1], # residual 경로의 출력 채널 수와 동일하게
                    kernel_size=1,
                    strides=(2,2), # Downsampling
                    padding='same',
                    use_bias=False
                )(x) # 현재 블록의 원본 입력 'x'에 적용
                x = keras.layers.BatchNormalization()(x)
            # Downsampling은 없지만 채널 수가 변경되어야 하는 경우
            # (예: ResNet-50의 stage2 첫 번째 블록, 입력 채널 x.shape[-1]과 residual 출력 채널 n_channels[-1]이 다를 때)
            elif index_block == 0 and x.shape[-1] != n_channels[-1]:
                x = keras.layers.Conv2D(
                    filters=n_channels[-1], # residual 경로의 출력 채널 수와 동일하게
                    kernel_size=1,
                    strides=(1,1), # Stride 1 (크기 유지, 채널만 변경)
                    padding='same',
                    use_bias=False
                )(x) # 현재 블록의 원본 입력 'x'에 적용
                x = keras.layers.BatchNormalization()(x)
            output = AddResidual()([residual, x])
            output = keras.layers.ReLU()(output)
            x = output

        else: # plain
            output = keras.layers.ReLU()(residual)
            x = output

    return output

def build_resnet(input_shape, is_50, is_plain = False):
    # [n_blocks, n_convs, kernel_sizes, n_channels]
    config34 = {
        'stage2': [3, 2, [3, 3], [64, 64]],
        'stage3': [4, 2, [3, 3], [128, 128]],
        'stage4': [6, 2, [3, 3], [256, 256]],
        'stage5': [3, 2, [3, 3], [512, 512]]
    }

    config50 = {
        'stage2': [3, 3, [1, 3, 1], [64, 64, 256]],
        'stage3': [4, 3, [1, 3, 1], [128, 128, 512]],
        'stage4': [6, 3, [1, 3, 1], [256, 256, 1024]],
        'stage5': [3, 3, [1, 3, 1], [512, 512, 2048]]
    }

    input_layer = keras.layers.Input(shape=input_shape)
    output = input_layer

    # common head
    output = keras.layers.Conv2D(
        filters = 64,
        kernel_size = 7,
        strides = (2, 2),
        padding = 'same',
        use_bias = True
    )(output)
    output = keras.layers.BatchNormalization()(output)
    output = keras.layers.ReLU()(output)
    output = keras.layers.MaxPooling2D(
        pool_size=(3, 3),
        strides = (2, 2),
        padding = 'same'
    )(output)

    if (not is_50): # 34
        for idx, stage_name in enumerate(['stage2', 'stage3', 'stage4', 'stage5']):
            n_blocks, n_convs, kernel_sizes, n_channels = config34[stage_name]
            is_first_stage = (idx == 0)
            output = build_resnet_blocks(output, n_blocks, n_convs, kernel_sizes, n_channels, is_first_stage, is_plain)

    if (is_50): # 50
        for idx, stage_name in enumerate(['stage2', 'stage3', 'stage4', 'stage5']):
            n_blocks, n_convs, kernel_sizes, n_channels = config50[stage_name]
            is_first_stage = (idx == 0)
            output = build_resnet_blocks(output, n_blocks, n_convs, kernel_sizes, n_channels, is_first_stage, is_plain)

    output = keras.layers.GlobalAveragePooling2D()(output)
    output = keras.layers.Dense(1, activation='sigmoid')(output)

    model = keras.Model(inputs=input_layer, outputs=output)

    return model

def build_plainnet(input_shape, is_50):
    return build_resnet(input_shape, is_50, True)

In [16]:
resnet_34 = build_resnet(input_shape = (224,224,3), is_50 = False)
resnet_34.summary()

In [17]:
resnet_34_plain = build_plainnet(input_shape = (224,224,3), is_50 = False)
resnet_34_plain.summary()

In [18]:
resnet_50 = build_resnet(input_shape = (224,224,3), is_50 = True)
resnet_50.summary()

In [19]:
resnet_50_plain = build_plainnet(input_shape = (224,224,3), is_50 = True)
resnet_50_plain.summary()

## 컴파일 및 훈련
- 초기 epoch를 10으로 했지만 너무 오래 걸려 5로 변경

In [27]:
models = [resnet_34, resnet_34_plain, resnet_50, resnet_50_plain]

for model in models:
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy'],
    )

In [28]:
history_resnet_34 = resnet_34.fit(
    ds_train,
    epochs=10,
    validation_data=ds_test,
    verbose=1,
)

Epoch 1/10
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 240ms/step - accuracy: 0.5960 - loss: 0.7354 - val_accuracy: 0.5802 - val_loss: 0.8319
Epoch 2/10
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 193ms/step - accuracy: 0.7004 - loss: 0.5748 - val_accuracy: 0.5469 - val_loss: 1.2353
Epoch 3/10
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 193ms/step - accuracy: 0.7671 - loss: 0.4868 - val_accuracy: 0.7356 - val_loss: 0.5819
Epoch 4/10
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 193ms/step - accuracy: 0.8106 - loss: 0.4212 - val_accuracy: 0.6346 - val_loss: 1.4038
Epoch 5/10
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 193ms/step - accuracy: 0.8454 - loss: 0.3542 - val_accuracy: 0.5305 - val_loss: 3.3369
Epoch 6/10
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 197ms/step - accuracy: 0.8761 - loss: 0.2933 - val_accuracy: 0.6752 - val_loss: 1.3914
Epoc

In [29]:
history_resnet_34_plain = resnet_34_plain.fit(
    ds_train,
    epochs=5,
    validation_data=ds_test,
    verbose=1,
)

Epoch 1/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 230ms/step - accuracy: 0.5201 - loss: 0.7242 - val_accuracy: 0.5638 - val_loss: 0.6994
Epoch 2/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 184ms/step - accuracy: 0.5360 - loss: 0.6881 - val_accuracy: 0.5344 - val_loss: 0.6905
Epoch 3/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 189ms/step - accuracy: 0.5434 - loss: 0.6863 - val_accuracy: 0.5574 - val_loss: 0.7004
Epoch 4/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 182ms/step - accuracy: 0.5654 - loss: 0.6820 - val_accuracy: 0.5641 - val_loss: 0.7143
Epoch 5/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 182ms/step - accuracy: 0.5536 - loss: 0.6839 - val_accuracy: 0.5851 - val_loss: 0.6670


In [30]:
history_resnet_50 = resnet_50.fit(
    ds_train,
    epochs=5,
    validation_data=ds_test,
    verbose=1,
)

Epoch 1/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 338ms/step - accuracy: 0.5893 - loss: 0.8053 - val_accuracy: 0.5600 - val_loss: 0.7844
Epoch 2/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 295ms/step - accuracy: 0.6781 - loss: 0.6066 - val_accuracy: 0.5699 - val_loss: 1.8896
Epoch 3/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 294ms/step - accuracy: 0.7305 - loss: 0.5375 - val_accuracy: 0.7117 - val_loss: 0.5939
Epoch 4/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 294ms/step - accuracy: 0.7719 - loss: 0.4830 - val_accuracy: 0.7582 - val_loss: 0.9227
Epoch 5/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 312ms/step - accuracy: 0.7947 - loss: 0.4432 - val_accuracy: 0.7646 - val_loss: 0.7471


In [31]:
history_resnet_50_plain = resnet_50_plain.fit(
    ds_train,
    epochs=5,
    validation_data=ds_test,
    verbose=1,
)

Epoch 1/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 268ms/step - accuracy: 0.5137 - loss: 0.7391 - val_accuracy: 0.5254 - val_loss: 0.7026
Epoch 2/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 241ms/step - accuracy: 0.5663 - loss: 0.6751 - val_accuracy: 0.5099 - val_loss: 0.6911
Epoch 3/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 245ms/step - accuracy: 0.5753 - loss: 0.6684 - val_accuracy: 0.5099 - val_loss: 0.6814
Epoch 4/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 242ms/step - accuracy: 0.5860 - loss: 0.6650 - val_accuracy: 0.5099 - val_loss: 0.6930
Epoch 5/5
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 245ms/step - accuracy: 0.5848 - loss: 0.6640 - val_accuracy: 0.5099 - val_loss: 0.6739


In [41]:
val_acc_resnet_34_plain = history_resnet_34_plain.history['val_accuracy'][-1]
val_acc_resnet_34 = history_resnet_34.history['val_accuracy'][4]  # 5번째 에폭
val_acc_resnet_50_plain = history_resnet_50_plain.history['val_accuracy'][-1]
val_acc_resnet_50 = history_resnet_50.history['val_accuracy'][-1]

# 표 출력
print(f"{'':<10} {'plain':<10} {'ResNet':<10}")
print(f"{'34 layers':<10} {val_acc_resnet_34_plain*100:.2f}     {val_acc_resnet_34*100:.2f}")
print(f"{'50 layers':<10} {val_acc_resnet_50_plain*100:.2f}     {val_acc_resnet_50*100:.2f}")

           plain      ResNet    
34 layers  58.51     53.05
50 layers  50.99     76.46


- plain net 기준으로 34개의 레이어를 쓴 모델이 50개의 레이어를 쓴 모델보다 점수가 높다. 네트워크가 너무 깊어지면 학습이 제대로 되지 않는 것을 알 수 있다.
- 50개의 레이어를 쓴 모델을 기준으로 ResNet이 plane net보다 검증 점수가 높은 걸 알 수 있다. 네트워크를 깊게 하여 잔차 연결을 이용하면 학습에 훨씬 도움이 되는 것을 알 수 있다.

## 회고

- 50개 레이어 모델에서 월등한 검증 점수가 나오는 것을 보고 깊은 네트워크가 학습에 도움이 된다는 것과 잔차 연결이 학습에 도움이 된다는 것을 잘 알았다.
- 시간이 부족해서 epoch를 크게 못했는데 epoch를 크게하여 실험해보는 것도 의미가 있을 거 같다.