# BIG DATA ANALYTICS: TF-Keras 101
- Tensorflow 2의 Keras API를 활용하여, 모델을 생성/훈련하는 과정에 대해서 확인해보겠습니다
- sources:
    - https://www.tensorflow.org/tutorials/quickstart/beginner
    - https://www.tensorflow.org/tutorials/keras/overfit_and_underfit
---

## 1. 라이브러리 설치 및 Import

In [None]:
import sys
!{sys.executable} -m pip install -q tensorflow
!pip install -q tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## 2. MNIST 손글씨 데이터 셋 Load

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [None]:
print(x_train.shape)
print(y_train.shape)

## 3. 모델 정의

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),#28x28 => 784
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

## 4. 훈련 및 검증

In [None]:
history = model.fit(x_train, y_train, epochs=20,validation_split=0.3)

In [None]:
model.evaluate(x_test,  y_test, verbose=2)

## Q1. 훈련을 더 많이 시키면 좋아질까?

In [None]:
import matplotlib.pyplot as plt
def plot_history(histories, key='loss'):
    plt.figure()

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()

    plt.xlim([0,max(history.epoch)])


plot_history([('', history)])

In [None]:
plot_history([('', history)],"accuracy")

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax') #만약 이진 분류라면?
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history2 = model.fit(x_train, y_train, epochs=30,validation_split=0.3)


In [None]:
plot_history([('Model1', history),('Model2', history2)],"loss")
plot_history([('Model1', history),('Model2', history2)],"accuracy")

## Q2. 모델이 더 깊으면(레이어가 더 많으면) 좋을까?

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)), 
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history3 = model.fit(x_train, y_train, epochs=20,validation_split=0.3)


In [None]:
plot_history([('Model1', history),('Model2', history2),('Model3', history3)],"loss")
plot_history([('Model1', history),('Model2', history2),('Model3', history3)],"accuracy")