##### Copyright 2020 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Data augmentation

## Overview

`tf.image` 를 사용한 image augmentation 실습

## Setup

In [None]:
!pip install -q git+https://github.com/tensorflow/docs

In [None]:
import urllib

import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers
AUTOTUNE = tf.data.experimental.AUTOTUNE

import tensorflow_docs as tfdocs
import tensorflow_docs.plots

import tensorflow_datasets as tfds

import PIL.Image

import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (12, 5)

import numpy as np

Data augmentation 을 확인하기 위해서 [image 다운로드](https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg) (by Von.grzanka,)

In [None]:
image_path = tf.keras.utils.get_file("cat.jpg", "https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg")
PIL.Image.open(image_path)

데이터를 읽어서 Tensor format으로 변환

In [None]:
image_string=tf.io.read_file(image_path)
image=tf.image.decode_jpeg(image_string,channels=3)

시각화 & 비교하기 위한 함수 정의


In [None]:
def visualize(original, augmented):
  fig = plt.figure()
  plt.subplot(1,2,1)
  plt.title('Original image')
  plt.imshow(original)

  plt.subplot(1,2,2)
  plt.title('Augmented image')
  plt.imshow(augmented)

## Augment a single image

### 이미지 대칭
수직축 또는 수평축으로 이미지 대칭을 생성

In [None]:
flipped = tf.image.flip_left_right(image)
visualize(image, flipped)

### 흑백 이미지 생성
이미지를 흑백 이미지로 만듦

In [None]:
grayscaled = tf.image.rgb_to_grayscale(image)
visualize(image, tf.squeeze(grayscaled))
plt.colorbar()

### 이미지 채도 변경
채도 factor를 통한 이미지 채도 변경

In [None]:
saturated = tf.image.adjust_saturation(image, 3)
visualize(image, saturated)

### 이미지 밝기 변경
밝기 factor를 통한 이미지 밝기 변경

In [None]:
bright = tf.image.adjust_brightness(image, 0.4)
visualize(image, bright)

### 이미지 회전
이미지를 90도씩 변경

In [None]:
rotated = tf.image.rot90(image)
visualize(image, rotated)

### 이미지 잘라내기
이미지 가운데 부분을 잘라내기

In [None]:
cropped = tf.image.central_crop(image, central_fraction=0.5)
visualize(image,cropped)

`tf.image` documentation 을 통해 사용가능한 다양한 augmentation 옵션들에 대해서 확인할 수 있음

## 데이터셋을 변형하여 모델 학습시키기

In [None]:
dataset, info =  tfds.load('mnist', as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

num_train_examples= info.splits['train'].num_examples

데이터셋을 augmentation 하기 위한 함수 정의

In [None]:
def convert(image, label):
  image = tf.image.convert_image_dtype(image, tf.float32) # Cast and normalize the image to [0,1]
  return image, label

def augment(image,label):
  image,label = convert(image, label)
  image = tf.image.convert_image_dtype(image, tf.float32) # Cast and normalize the image to [0,1]
  image = tf.image.resize_with_crop_or_pad(image, 34, 34) # Add 6 pixels of padding
  image = tf.image.random_crop(image, size=[28, 28, 1]) # Random crop back to 28x28
  image = tf.image.random_brightness(image, max_delta=0.5) # Random brightness

  return image,label

In [None]:
BATCH_SIZE = 64
# Only use a subset of the data so it's easier to overfit, for this tutorial
NUM_EXAMPLES = 2048

augmentation이 적용된 데이터셋 생성

In [None]:
augmented_train_batches = (
    train_dataset
    # Only train on a subset, so you can quickly see the effect.
    .take(NUM_EXAMPLES)
    .cache()
    .shuffle(num_train_examples//4)
    # The augmentation is added here.
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
) 

비교를 위한 augmentation이 적용되지 않은 데이터셋도 생성

In [None]:
non_augmented_train_batches = (
    train_dataset
    # Only train on a subset, so you can quickly see the effect.
    .take(NUM_EXAMPLES)
    .cache()
    .shuffle(num_train_examples//4)
    # No augmentation.
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
) 

Validation 데이터셋 정의 (augmentation 미적용)

In [None]:
validation_batches = (
    test_dataset
    .map(convert, num_parallel_calls=AUTOTUNE)
    .batch(2*BATCH_SIZE)
)

Keras를 이용한 모델 생성 함수 정의

In [None]:
def make_model():
  model = tf.keras.Sequential([
      layers.Flatten(input_shape=(28, 28, 1)),
      layers.Dense(4096, activation='relu'),
      layers.Dense(4096, activation='relu'),
      layers.Dense(10)
  ])
  model.compile(optimizer = 'adam',
                loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

모델 학습 (**without** augmentation)

In [None]:
model_without_aug = make_model()

no_aug_history = model_without_aug.fit(non_augmented_train_batches, epochs=50, validation_data=validation_batches)

모델 학습 (with augmentation)

In [None]:
model_with_aug = make_model()

aug_history = model_with_aug.fit(augmented_train_batches, epochs=50, validation_data=validation_batches)

## 결론

Augmentation을 사용한 모델의 accuracy가 95% 까지 나옴
- 이는 augmentation을 하지 않은 결과보다 1% 높은 수치

In [None]:
plotter = tfdocs.plots.HistoryPlotter()
plotter.plot({"Augmented": aug_history, "Non-Augmented": no_aug_history}, metric = "accuracy")
plt.title("Accuracy")
plt.ylim([0.75,1])

Loss 측면에서 살펴보면, augmentation을 사용한 것이 그렇지 않은 것 대비 overfitting을 피한 것을 알 수 있음 (단 augmenation에 시간이 소요되기 때문에 학습에 걸리는 시간은 느림)

In [None]:
plotter = tfdocs.plots.HistoryPlotter()
plotter.plot({"Augmented": aug_history, "Non-Augmented": no_aug_history}, metric = "loss")
plt.title("Loss")
plt.ylim([0,1])