# 경사 하강법을 이용한 얕은 신경망 학습


In [1]:
import tensorflow as tf
import numpy as np

## 데이터셋 생성, 전처리

In [2]:
np.random.seed(0)

pts = list()
labels = list()
center_pts = np.random.uniform(-8.0, 8.0, (10, 2))

for label, center_pt in enumerate(center_pts):
    # 10 * 100 = 1000 pts
    for _ in range(100):
        pts.append(center_pt + np.random.randn(*center_pt.shape))
        labels.append(label)

pts = np.stack(pts, axis=0).astype(np.float32)
labels = np.stack(labels, axis=0)

train_ds = tf.data.Dataset.from_tensor_slices((pts, labels)).shuffle(1000).batch(32)

In [3]:
print(pts.shape)
print(labels.shape)

(1000, 2)
(1000,)


## 네트워크 구조 정의
### 얕은 신경망
#### 입력 계층 : 2, 은닉 계층 : 128 (Sigmoid activation), 출력 계층 : 10 (Softmax activation)

In [4]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        
        self.d1 = tf.keras.layers.Dense(128, input_dim=2, activation='sigmoid')
        self.d2 = tf.keras.layers.Dense(10, activation='softmax')
            
    def call(self, x, training=None, mask=None):
        x = self.d1(x)
        return self.d2(x)

## 학습 루프 정의

In [5]:
@tf.function
def train_step(model, inputs, labels, loss_object, optimizer, train_loss, train_metric):
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        loss = loss_object(labels, predictions)
        
    gradients = tape.gradient(loss, model.trainable_variables) # df(x)/dx
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_metric(labels, predictions)

## 모델 생성

In [6]:
model = MyModel()

## 손실 함수 및 최적화 알고리즘 설정
### CrossEntropy, Adam Optimizer

In [7]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

## 평가 지표 설정
### Accuracy

In [8]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

## 학습 루프

In [9]:
EPOCHS = 1000

for epoch in range(EPOCHS):
    for x, label in train_ds:
        train_step(model, x, label, loss_object, optimizer, train_loss, train_accuracy)
        
    template = 'Epoch {}, Loss: {}, Accuracy: {}'
    if (epoch+1)%100 == 0:
        print(template.format(epoch + 1, train_loss.result(), train_accuracy.result() * 100))
        
    train_loss.reset_states()
    train_accuracy.reset_states()

Epoch 100, Loss: 0.2910615801811218, Accuracy: 89.0999984741211
Epoch 200, Loss: 0.2622007131576538, Accuracy: 89.0
Epoch 300, Loss: 0.26749688386917114, Accuracy: 88.80000305175781
Epoch 400, Loss: 0.2575363516807556, Accuracy: 89.30000305175781
Epoch 500, Loss: 0.25000008940696716, Accuracy: 89.80000305175781
Epoch 600, Loss: 0.2632284164428711, Accuracy: 89.60000610351562
Epoch 700, Loss: 0.24964496493339539, Accuracy: 90.10000610351562
Epoch 800, Loss: 0.24530459940433502, Accuracy: 89.70000457763672
Epoch 900, Loss: 0.24247904121875763, Accuracy: 90.5999984741211
Epoch 1000, Loss: 0.2498401254415512, Accuracy: 90.5999984741211


## 데이터셋 및 학습 파라미터 저장

In [10]:
np.savez_compressed('ch2_dataset.npz', inputs=pts, labels=labels)

W_h, b_h = model.d1.get_weights()
W_o, b_o = model.d2.get_weights()
W_h = np.transpose(W_h)
W_o = np.transpose(W_o)
np.savez_compressed('ch2_parameters.npz',
                    W_h=W_h,
                    b_h=b_h,
                    W_o=W_o,
                    b_o=b_o)