## Optimization & Training(Beginner)
- tf와 layers 패키지 불러오기

In [2]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import datasets
import numpy as np

### Prepare MNIST Dataset

In [3]:
(train_x, train_y), (test_x, test_y) = datasets.mnist.load_data()
train_x.shape, train_y.shape

((60000, 28, 28), (60000,))

### Build Model

In [4]:
# inputs = layers.Input((28, 28, 1))

# net = layers.Conv2D(32, (3, 3), padding='SAME')(inputs)
# net = layers.Activation('relu')(net)
# net = layers.Conv2D(32, (3, 3), padding='SAME')(net)
# net = layers.Activation('relu')(net)
# net = layers.MaxPooling2D(pool_size=(2, 2))(net)
# net = layers.Dropout(0.25)(net)

# net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
# net = layers.Activation('relu')(net)
# net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
# net = layers.Activation('relu')(net)
# net = layers.MaxPooling2D(pool_size=(2, 2))(net)
# net = layers.Dropout(0.25)(net)

# net = layers.Flatten()(net)
# net = layers.Dense(512)(net)
# net = layers.Activation('relu')(net)
# net = layers.Dropout(0.5)(net)
# net = layers.Dense(10)(net)  # num_classes
# net = layers.Activation('softmax')(net)

# model = tf.keras.Model(inputs=inputs, outputs=net, name='Basic_CNN')


model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(input_shape=(28, 28, 1), filters=32, kernel_size=(3,3), strides=(1, 1), padding='SAME', activation='relu'),
    tf.keras.layers.MaxPool2D([2, 2], padding='same'),
    tf.keras.layers.Dropout(0.25),    
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=(1, 1), padding='SAME', activation='relu'),
    tf.keras.layers.MaxPool2D([2, 2], padding='same'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

### Optimization
모델을 학습하기 전 설정
- Loss Function
- Optimization
- Metrics

### Loss Function
Loss Function 방법 확인

#### Categorical vs Binary
- 'binary_crossentropy'
- 'categorical_crossentropy', 'sparse_categorical_crossentropy'

#### sparse_categorical_crossentropy vs categorical_crossentropy
- one-hot encoding binary: tf.keras.losses.binary_crossentropy
- one-hot encoding multiple: tf.keras.losses.categorical_crossentropy
- one-hot encoding 아닐경우: tf.keras.losses.sparse_categorical_crossentropy


If your targets are one-hot encoded, use categorical_crossentropy
[1,0,0]
[0,1,0]
[0,0,1]

if your targets are integers, use sparse_categorical_crossentropy.
1
2
3

In [5]:
loss_fun = 'sparse_categorical_crossentropy'

### Metrics
- 모델을 평가하는 방법
- accuracy를 보통 많이 사용함 : 전체 갯수에서 맞춘 확률

accuracy를 이름으로 넣는 방법
- metrics = ['accuracy']
- metrics는 List 타입으로 주는 이유: Accuracy, recall, precision 등을 주기 때문이다.

tf.keras.metrics 를 이용하는 방법(아래 방법을 사용하면 에러 발생함 - 원인파악 필요)
- tf.keras.metrics.Accuracy()
- tf.keras.metrics.Precision()
- tf.keras.metrics.Recall()

In [6]:
metrics = ['accuracy']

### Optimizer 적용
- 'sgd'
- 'rmsprop'
- 'adam'
- tf.keras.optimizers.SGD()
- tf.keras.optimizers.RMSprop()
- tf.keras.optimizers.Adam()

In [7]:
optm = tf.keras.optimizers.Adam()

### Compile

In [8]:
model.compile(optimizer=optm,
              loss=loss_fun,
              metrics=metrics)

## Prepare Dataset
학습에 사용할 데이터셋 준비

shape 확인: MNIST 데이터셋은 gray로 되어있기 때문에 channel 데이터가 없는 3차원 구조이다.

RGB일 경우 차원수를 늘려줄 필요없다.

In [11]:
train_x.shape, train_y.shape

((60000, 28, 28), (60000,))

In [12]:
test_x.shape, test_y.shape

((10000, 28, 28), (10000,))

tf.keras.Model을 사용할 경우 차원을 4차원으로 만들어줘야 한다.

channel을 추가해 3차원에서 4차원으로 차원 수 늘리기

차원 늘리는 방법
- np.expand_dims(train_x, -1).shape
- tf.expand_dims(train_x, -1).shape
- train_x[..., tf.nexaxis]

In [13]:
train_x = train_x[..., tf.newaxis]
test_x = test_x[..., tf.newaxis]

In [14]:
train_x.shape, test_x.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

### Rescaling 
- 데이터가 너무 확확 커지는걸 Rescaling 해서 조정한다. 해줘야 기울기가 잘 잡힌다.
- Normialization: RGB는 0 ~ 255 값으로 되어 있는데 0과 1로 변경해준다. 즉 255.0로 나눠준다

In [15]:
np.min(train_x), np.max(train_x)

(0, 255)

In [16]:
train_x = train_x / 255.
test_x = test_x / 255.

In [17]:
np.min(train_x), np.max(train_x)

(0.0, 1.0)

### Training
학습

학습용 Hyperparameter 설정
- num_epochs: 데이터를 한번 쭉 다 보는 수
- batch_size: batch size만큼 데이터를 model에 한 번에 넣어준다. memory 사용효율을 위해 적정 수준을 넣어줘야 한다.
너무 많이 넣으면 memory full 현상 발생 가능성 있다.
- 학습을 할때는 shuffle을 해야한다.
- shuffle: overfitting을 방지하기 위한 방법으로 

num_epochs = 1
batch_size = 32

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 7, 7, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 3136)              0

In [19]:
epochs = 1
batch_size = 32
model.fit(x=train_x, y=train_y, batch_size=batch_size, epochs=epochs)

Train on 60000 samples


<tensorflow.python.keras.callbacks.History at 0x1fe525c7e08>