In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

from tensorflow.keras.layers import Conv2D, Dropout, Flatten, BatchNormalization, MaxPooling2D, LeakyReLU, ReLU, PReLU
from tensorflow.keras.optimizers import RMSprop, Nadam, Adadelta, Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt  # 그림으로 보기 위한 matplotlib 라이브러리 import
from tensorflow.keras.datasets import mnist  # 라이브러리가 기본으로 제공하는 mnist 데이터셋 
from tensorflow.keras.utils import to_categorical # one-hot encoding 을 위한 함수
from tensorflow.keras.models import Sequential # 레이어를 층층이 쌓아가는 연쇄 모델
from tensorflow.keras.layers import Dense  # 완전연결층
from tensorflow.keras.models import load_model  # 저장된 모델 불러오기

In [2]:
(X_train, y_train) , (X_test, y_test)  =  mnist.load_data()

#데이터셋 차원 확인
print(f"X_train_shape : {X_train.shape}")  # X_train_shape : (60000, 28, 28) 손글씨 이미지 파일 6만개 저장
print(f"y_train_shape : {y_train.shape}") # y_train_shape : (60000,)
print(f"X_test_shape : {X_test.shape}")  # X_test_shape : (10000, 28, 28) 1만개 저장
print(f"y_test_shape : {y_test.shape}")  # y_test_shape : (10000,)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
X_train_shape : (60000, 28, 28)
y_train_shape : (60000,)
X_test_shape : (10000, 28, 28)
y_test_shape : (10000,)


In [3]:
#Input data preprocessing
X_train = X_train.reshape((60000, 28, 28, 1))
X_test = X_test.reshape((10000, 28, 28, 1))
print(X_train.shape) # (60000, 28, 28)
print(X_train.shape) # (60000, )

print(f"X_train_shape: {X_train.shape}")  # X_train_shape : (60000, 784)
print(f"X_test_shape: {X_test.shape}")  # X_test_shape : (10000, 784)

(60000, 28, 28, 1)
(60000, 28, 28, 1)
X_train_shape: (60000, 28, 28, 1)
X_test_shape: (10000, 28, 28, 1)


In [4]:
#Output data preprocessing
# 출력 데이터: 단일값(스칼라값) --> 1차원의 벡터값, 해당 클래스에 해당하는 값만 1, 나머지 0
y_train = to_categorical(y_train, 10) # one-hot encoding, 1차원 -> 2차원
y_test = to_categorical(y_test, 10)

print(f"y_train_shape: {y_train.shape}")  # y_train_shape: (60000, 10)
print(f"y_test_shape: {y_test.shape}")  # y_test_shape: (10000, 10)

y_train_shape: (60000, 10)
y_test_shape: (10000, 10)


In [5]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), padding='same', input_shape=(28, 28, 1)),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(64,  (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(128, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(128, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(2048),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Dense(2048),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Dropout(0.8),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 64)        640       
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 64)        256       
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 28, 28, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        36928     
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 28, 64)        256       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 28, 28, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0

In [7]:
optimizer = Adam(learning_rate=0.01) # 논문 설정대로 0.01을 주었다.
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [9]:
model.fit(X_train, y_train, batch_size = 100, epochs=10, validation_split = 0.2)
loss, acc = model.evaluate(X_test, y_test)  # 학습 완료 후 검증
print("손실률:", loss) 
print("정확도:", acc)  

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
손실률: 20.056108474731445
정확도: 0.9886999726295471


In [10]:
datagen_train = ImageDataGenerator(rotation_range = 10, 
                                   # 360도 기준으로 정수형을 넣어야한다. 10 -> 10도안에서 회전
                                   width_shift_range = 0.25, 
                                   # 1을 기준으로 0.25만큼 가로 이동, 1보다 크다면 이동 픽셀수로 변환
                                   height_shift_range = 0.25, 
                                   # 위와 같음
                                   shear_range = 0.1,  
                                   # 휘어짐 정도
                                   zoom_range = 0.4,
                                   # 확대 정도, 이 경우 [최소:0.6, 최대:1.4] 을 의미한다. [0.7,1] 이런 식도 가능
                                   horizontal_flip = False) 
# 수평 뒤집기 False로 방지, 하지만 이미 default가 False라 굳이 할 필요는 없다.
datagen_val = ImageDataGenerator() 
learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau( 
    monitor='loss',    
    # Quantity to be monitored.
    factor=0.25,       
    # Factor by which the learning rate will be reduced. new_lr = lr * factor
    patience=2,        
    # The number of epochs with no improvement after which learning rate will be reduced.
    verbose=1,         
    # 0: quiet - 1: update messages.
    mode="auto",       
    # {auto, min, max}. In min mode, lr will be reduced when the quantity monitored has stopped decreasing; 
    # in the max mode it will be reduced when the quantity monitored has stopped increasing; 
    # in auto mode, the direction is automatically inferred from the name of the monitored quantity.
    min_delta=0.0001,  
    # threshold for measuring the new optimum, to only focus on significant changes.
    cooldown=0,        
    # number of epochs to wait before resuming normal operation after learning rate (lr) has been reduced.
    min_lr=0.00001     
    # lower bound on the learning rate.
    )

In [11]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), padding='same', input_shape=(28, 28, 1)),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(64,  (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(64,  (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),
    
    tf.keras.layers.Conv2D(128, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(128, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(128, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),    
    
    tf.keras.layers.Conv2D(256, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Conv2D(256, (3,3), padding='same'),
    tf.keras.layers.BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"),##
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Dropout(0.25),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256),
    tf.keras.layers.LeakyReLU(alpha=0.1),
 
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])
optimizer = RMSprop(learning_rate=0.002,
    rho=0.9,
    momentum=0.1,
    epsilon=1e-07,
    centered=True,
    name='RMSprop')
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 28, 28, 64)        640       
_________________________________________________________________
batch_normalization_4 (Batch (None, 28, 28, 64)        256       
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 28, 28, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 28, 28, 64)        36928     
_________________________________________________________________
batch_normalization_5 (Batch (None, 28, 28, 64)        256       
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 28, 28, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 28, 28, 64)       

In [14]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=300, restore_best_weights=True)
history = model.fit(datagen_train.flow(X_train, y_train, batch_size=256),
                              steps_per_epoch=len(X_train)//256,
                              epochs=14,
                              validation_data=(X_test, y_test),
                              callbacks=[learning_rate_reduction, es],
                              verbose=2)

Epoch 1/14
234/234 - 73s - loss: 0.6046 - accuracy: 0.8031 - val_loss: 0.2552 - val_accuracy: 0.9618
Epoch 2/14
234/234 - 34s - loss: 0.1752 - accuracy: 0.9452 - val_loss: 0.1026 - val_accuracy: 0.9715
Epoch 3/14
234/234 - 34s - loss: 0.1330 - accuracy: 0.9584 - val_loss: 0.0281 - val_accuracy: 0.9914
Epoch 4/14
234/234 - 34s - loss: 0.1110 - accuracy: 0.9654 - val_loss: 0.0711 - val_accuracy: 0.9792
Epoch 5/14
234/234 - 34s - loss: 0.0992 - accuracy: 0.9683 - val_loss: 0.0603 - val_accuracy: 0.9826
Epoch 6/14
234/234 - 34s - loss: 0.0914 - accuracy: 0.9708 - val_loss: 0.0323 - val_accuracy: 0.9904
Epoch 7/14
234/234 - 34s - loss: 0.0869 - accuracy: 0.9728 - val_loss: 0.0206 - val_accuracy: 0.9939
Epoch 8/14
234/234 - 34s - loss: 0.0816 - accuracy: 0.9751 - val_loss: 0.0271 - val_accuracy: 0.9918
Epoch 9/14
234/234 - 34s - loss: 0.0748 - accuracy: 0.9775 - val_loss: 0.0289 - val_accuracy: 0.9912
Epoch 10/14
234/234 - 34s - loss: 0.0762 - accuracy: 0.9752 - val_loss: 0.0207 - val_accura

In [15]:
model.evaluate(X_test, y_test, batch_size=256)



[0.01728694699704647, 0.9945999979972839]