# base line

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb
(train_data, train_labels), _ = imdb.load_data(num_words=10000)

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results
train_data = vectorize_sequences(train_data)

model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
             loss="binary_crossentropy",
             metrics=["accuracy"])
history_original = model.fit(train_data, train_labels,
                            epochs=20, batch_size=512, validation_split=0.4)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Epoch 1/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 49ms/step - accuracy: 0.6613 - loss: 0.6305 - val_accuracy: 0.8671 - val_loss: 0.4413
Epoch 2/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.8849 - loss: 0.3822 - val_accuracy: 0.8775 - val_loss: 0.3348
Epoch 3/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.9150 - loss: 0.2643 - val_accuracy: 0.8797 - val_loss: 0.3062
Epoch 4/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.9327 - loss: 0.2082 - val_accuracy: 0.8878 - val_loss: 0.2797
Epoch 5/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.9502 - loss: 0.1646 - val_accuracy: 0.8906 - val_loss: 0.2776
Epoch 6/20
[1m30/3

## 사용자 정의 지표

In [7]:
from tensorflow.keras import backend as K

class F1Score(keras.metrics.Metric):
    def __init__(self, name="f1_score", **kwargs):
        super().__init__(name=name, **kwargs)
        self.true_positives = self.add_weight(name="tp", initializer="zeros")
        self.false_positives = self.add_weight(name="fp", initializer="zeros")
        self.false_negatives = self.add_weight(name="fn", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.squeeze(K.cast(y_true, "float32"))
        y_pred = tf.squeeze(K.round(y_pred))

        self.true_positives.assign_add(K.sum(K.cast(y_true * y_pred, "float32")))
        self.false_positives.assign_add(K.sum(K.cast((1 - y_true) * y_pred, "float32")))
        self.false_negatives.assign_add(K.sum(K.cast(y_true * (1 - y_pred), "float32")))

    def result(self):
        precision = self.true_positives / (self.true_positives + self.false_positives + K.epsilon())
        recall = self.true_positives / (self.true_positives + self.false_negatives + K.epsilon())

        f1_score = 2 * (precision * recall) / (precision + recall + K.epsilon())
        return f1_score

    def reset_state(self):
        self.true_positives.assign(0)
        self.false_positives.assign(0)
        self.false_negatives.assign(0)

In [8]:
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy", F1Score()])
n_history = model.fit(train_data, train_labels,
        epochs=20, batch_size=4, validation_split=0.4)

Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 4ms/step - accuracy: 0.8169 - f1_score: 0.8265 - loss: 0.4102 - val_accuracy: 0.8879 - val_f1_score: 0.8864 - val_loss: 0.2905
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.9175 - f1_score: 0.9188 - loss: 0.2215 - val_accuracy: 0.8942 - val_f1_score: 0.8937 - val_loss: 0.2875
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9254 - f1_score: 0.9250 - loss: 0.2060 - val_accuracy: 0.8872 - val_f1_score: 0.8834 - val_loss: 0.3082
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 4ms/step - accuracy: 0.9327 - f1_score: 0.9336 - loss: 0.1902 - val_accuracy: 0.8895 - val_f1_score: 0.8884 - val_loss: 0.3255
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.9372 - f1_score: 0.9375 - loss: 0.1830 - val_accuracy: 0.8904 - val_f

In [9]:
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy", F1Score()])
n_history = model.fit(train_data, train_labels,
        epochs=20, batch_size=512, validation_split=0.4)

Epoch 1/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 64ms/step - accuracy: 0.6789 - f1_score: 0.7064 - loss: 0.6330 - val_accuracy: 0.8617 - val_f1_score: 0.8649 - val_loss: 0.4484
Epoch 2/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.8750 - f1_score: 0.8774 - loss: 0.4050 - val_accuracy: 0.8817 - val_f1_score: 0.8817 - val_loss: 0.3418
Epoch 3/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.9063 - f1_score: 0.9076 - loss: 0.2930 - val_accuracy: 0.8883 - val_f1_score: 0.8869 - val_loss: 0.2950
Epoch 4/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.9249 - f1_score: 0.9257 - loss: 0.2276 - val_accuracy: 0.8906 - val_f1_score: 0.8901 - val_loss: 0.2772
Epoch 5/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.9414 - f1_score: 0.9423 - loss: 0.1807 - val_accuracy: 0.8901 - val_f1_score: 0.8913 - va

# batch_size에 따라 tp가 달라진다...!