# 붓꽃의 품종 분류

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
print("텐서플로 버전: {}".format(tf.__version__))
print("즉시 실행: {}".format(tf.executing_eagerly()))
np.set_printoptions(precision=3, linewidth=500)

## 측정된 꽃받침과 꽃잎의 길이와 폭을 토대로 붓꽃을 분류하는 모델을 통해 경사하강법(GD) 학습
* Iris setosa
* Iris virginica
* Iris versicolor

![꽃](https://www.tensorflow.org/images/iris_three_species.jpg)

## 데이터 (CSV)

In [None]:
train_dataset_url = "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv"
test__dataset_url = "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv"

train_dataset_fp = tf.keras.utils.get_file(fname=os.path.basename(train_dataset_url), origin=train_dataset_url)
test__dataset_fp = tf.keras.utils.get_file(fname=os.path.basename(test__dataset_url), origin=test__dataset_url)

print("데이터셋이 복사된 위치: {}".format(train_dataset_fp))
!head -n5 {train_dataset_fp}
!head -n5 {test__dataset_fp}

In [None]:
# column_name = ['꽃잎 길이', '꽃잎 너비', '꽃받침 길이', '꽃받침 너비', '종']
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# species = 0: Iris setosa, 1: Iris versicolor, 2: Iris virginica
class_names = ['Iris setosa', 'Iris versicolor', 'Iris virginica']

feature_names = column_names[:-1]
label_name = column_names[-1]

print("특성: {}".format(feature_names))
print("레이블: {}".format(label_name))

In [None]:
batch_size = 32
train_dataset = tf.data.experimental.make_csv_dataset(
    train_dataset_fp,
    batch_size,
    column_names=column_names,
    label_name=label_name,
    shuffle=False,
    num_epochs=1)

In [None]:
features, labels = next(iter(train_dataset))
plt.scatter(features['petal_length'], features['sepal_length'], c=labels, cmap='viridis')
plt.xlabel("Petal length")
plt.ylabel("Sepal length")
plt.show()

In [None]:
def pack_features_vector(features, labels):
  """특성들을 단일 배열로 묶습니다."""
# tf.stack: Stacks a list of rank-R tensors into one rank-(R+1) tensor.
  features = tf.stack(list(features.values()), axis=1)
  return features, labels

In [None]:
train_dataset = train_dataset.map(pack_features_vector)

In [None]:
train_dataset

In [None]:
features, labels = next(iter(train_dataset))
print(features[:5], labels[:5])

## 데이터 (pandas)

In [None]:
# pandas.read_csv : Read a comma-separated values (csv) file into DataFrame.
pdFeature = pd.read_csv(train_dataset_fp, sep=',', skiprows=1, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'])

# pandas.DataFrame.pop : Return item and drop from frame. Raise KeyError if not found.
pdLabel = pdFeature.pop('species')

# pandas.DataFrame.head : Return the first n rows.
pdFeature.head(5)

In [None]:
# tf.data.Dataset.from_tensor_slices: Creates a Dataset whose elements are slices of the given tensors..
pdDataset = tf.data.Dataset.from_tensor_slices((pdFeature.values, pdLabel.values))
pdFeature.values.shape

In [None]:
for row, data in enumerate(pdDataset.take(5)):
    print ('Row: {}, Features: {}, Species: {}'.format(row, data[0], data[1]))

## 모델

![](https://www.tensorflow.org/images/custom_estimators/full_network.png)

In [None]:
# tf.keras.Sequential: Linear stack of layers.
# Please check the configuration of each layer (press 'Shift+Tab' in xx.Dense(here!))
model = tf.keras.Sequential([
  tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4,)),  # Weight:  4-inputs * 10-dense =  40,  Bias: 10, Total:  50
  tf.keras.layers.Dense(10, activation=tf.nn.relu),                    # Weight: 10-inputs * 10-dense = 100,  Bias: 10, Total: 110
  tf.keras.layers.Dense(3)                                             # Weight: 10-inputs *  3-dense =  30,  Bias:  3, Total:  33
])
model.summary()
#help(model)

## 훈련 (Training) 전 모델 사용

In [None]:
print('Input Shape: {}'.format(features.shape))
for i in range(len(model.layers)):
    print('Layer {}: Weight{} Bias{} Activation{}'.format(i, model.layers[i].weights[0].shape, model.layers[i].bias.shape, model.layers[i].output.shape))

In [None]:
print('>> 1st Features:\n %s\n'%(features[0].numpy()))
print('>> Weights (Input-Layer1):\n %s\n'%(model.layers[0].weights[0].numpy()))
print('>> Biases   (Input-Layer1):\n %s\n'%(model.layers[0].bias.numpy()))
print('>> Layer1 MatMul[0,0]: %s\n'%(np.sum(features[0]*model.layers[0].weights[0][:,0])))
print('>> Layer1 MatMul: \n %s\n'%(tf.matmul(features, model.layers[0].weights[0]).numpy()[:5]))

actLayer0 = tf.nn.relu(tf.matmul(features,  model.layers[0].weights[0]) + tf.ones([batch_size, 1]) * model.layers[0].bias)
actLayer1 = tf.nn.relu(tf.matmul(actLayer0, model.layers[1].weights[0]) + tf.ones([batch_size, 1]) * model.layers[1].bias)
actLayer2 =            tf.matmul(actLayer1, model.layers[2].weights[0]) + tf.ones([batch_size, 1]) * model.layers[2].bias

In [None]:
predictions = model(features)
predictions[:5]

In [None]:
actLayer2[:5]

In [None]:
model.layers[1](model.layers[0](features))[:5]

In [None]:
actLayer1[:5]

## Softmax 적용 (로짓(logit)을 각 클래스에 대한 확률로 변환)
![](https://wikimedia.org/api/rest_v1/media/math/render/svg/bdc1f8eaa8064d15893f1ba6426f20ff8e7149c5)

In [None]:
# Using Numpy
for idx, logitRow in enumerate(predictions[:5]):
    print('Index: {}, Prob. per Class: {}'.format(idx, np.exp(logitRow)/np.sum(np.exp(logitRow))))

In [None]:
# Using TensorFlow
tf.nn.softmax(predictions[:5])

In [None]:
print("  예측: {}".format(tf.argmax(predictions, axis=1)))
print("레이블: {}".format(labels))

## 모델 훈련하기

### 손실함수 (Loss Function)

#### 평균 제곱 오차 (Mean Squared Error, MSE) : Regression (회귀) 문제에 주로 사용
![](https://wikimedia.org/api/rest_v1/media/math/render/svg/e258221518869aa1c6561bb75b99476c4734108e)

#### 교차 엔트로피 오차 (Cross Entropy Error, CEE) : Classification (분류) 문제에 주로 사용

![](https://wikimedia.org/api/rest_v1/media/math/render/svg/c6b895514e10a3ce88773852cba1cb1e248ed763)

In [None]:
# Cross Entropy Error Using Numpy
cee = 0
for idx, prob in enumerate(tf.nn.softmax(model(features))):
    y  = labels[idx]
    y_ = prob[y]
    cee = cee - np.log(y_)/batch_size
print(cee)

In [None]:
# tf.keras.losses.SparseCategoricalCrossentropy: Computes the crossentropy loss between the labels and predictions.
lossCEE = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
print(lossCEE(y_true=labels, y_pred=model(features)).numpy())
def loss(model, x, y):
  y_ = model(x)
  return lossCEE(y_true=y, y_pred=y_)

## 옵티마이저 생성

![](https://cs231n.github.io/assets/nn3/opt1.gif)

In [None]:
# tf.GradientTape: Record operations for automatic differentiation.
def grad(model, inputs, targets):
  with tf.GradientTape() as tape:
    loss_value = loss(model, inputs, targets)
  return loss_value, tape.gradient(loss_value, model.trainable_variables)

# tf.keras.optimizers: Built-in optimizer classes.
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
if optimizer.iterations.numpy() == 0:
    m1 = 0
    m2 = 0

In [None]:
optimizer.get_weights()

In [None]:
loss_value, grads = grad(model, features, labels)

print("단계: {}, 초기 손실: {}".format(optimizer.iterations.numpy(), loss_value.numpy()))
weights_pre  = model.layers[0].weights[0].numpy()

optimizer.apply_gradients(zip(grads, model.trainable_variables))
print("단계: {},     손실: {}\n".format(optimizer.iterations.numpy(), loss(model, features, labels).numpy()))
weights_post = model.layers[0].weights[0].numpy()

print(weights_pre,  '\n\n')
print(weights_post)

### Adam

$$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$
$$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$
$$t := 0 \text{(Initialize timestep)}$$
$$t := t + 1$$
$$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$

In [None]:
if optimizer.iterations.numpy() != 0:
    t   = optimizer.iterations.numpy()
    print(t)
    lr  = optimizer.learning_rate * tf.sqrt(1-(optimizer.beta_2 ** t)) / (1-(optimizer.beta_1**t))
    m1 = optimizer.beta_1.numpy() * m1 + (1-optimizer.beta_1.numpy()) * grads[0].numpy()
    m2 = optimizer.beta_2.numpy() * m2 + (1-optimizer.beta_2.numpy()) * grads[0].numpy() ** 2  
    
weights_cal = weights_pre - lr * m1 / (np.sqrt(m2) + optimizer.epsilon)
print(weights_cal)
print(weights_post)

In [None]:
## 노트: 이 셀을 다시 실행하면 동일한 모델의 변수가 사용됩니다.

# 도식화를 위해 결과를 저장합니다.
train_loss_results = []
train_accuracy_results = []

num_epochs = 201

for epoch in range(num_epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
    
    # 훈련 루프 - 32개의 배치를 사용합니다.
    for x, y in train_dataset:
        # 모델을 최적화합니다.
        loss_value, grads = grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        # 진행 상황을 추적합니다.
        epoch_loss_avg(loss_value)  # 현재 배치 손실을 추가합니다.
        # 예측된 레이블과 실제 레이블 비교합니다.
        epoch_accuracy(y, model(x))

    # epoch 종료
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())

    if epoch % 50 == 0:
        print("에포크 {:03d}: 손실: {:.3f}, 정확도: {:.3%}".format(epoch, epoch_loss_avg.result(), epoch_accuracy.result()))

In [None]:
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Procedure')

axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)
plt.show()

In [None]:
test_dataset = tf.data.experimental.make_csv_dataset(
    test__dataset_fp,
    batch_size,
    column_names=column_names,
    label_name='species',
    num_epochs=1,
    shuffle=False)

test_dataset = test_dataset.map(pack_features_vector)

In [None]:
test_accuracy = tf.keras.metrics.Accuracy()

for (x, y) in test_dataset:
  logits = model(x)
  prediction = tf.argmax(logits, axis=1, output_type=tf.int32)
  test_accuracy(prediction, y)

print("테스트 세트 정확도: {:.3%}".format(test_accuracy.result()))

In [None]:
tf.stack([y,prediction],axis=1)