# 11장

In [1]:
import tensorflow as tf, os

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 메모리 증가 방지 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
os.putenv('TF_GPU_ALLOCATOR', 'cuda_malloc_async')

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

keras.layers.Dense(10, activation='relu', kernel_initializer='he_normal')

he_avg_init = keras.initializers.VarianceScaling(scale=2, mode='fan_avg', distribution='uniform')
keras.layers.Dense(10, activation='sigmoid', kernel_initializer=he_avg_init);

In [2]:
keras.layers.Dense(10, kernel_initializer='he_normal')
keras.layers.LeakyReLU(alpha=0.2)

keras.layers.Dense(10, activation='selu', kernel_initializer='lecun_normal');

In [3]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.BatchNormalization(momentum=0.99), # momentum: 기본 0.99 미니배치가 작을수록 소수점 뒤에 9를 넣어 1에 가깝게 만듦
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, kernel_initializer='he_normal', use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('elu'),
    keras.layers.Dense(10, activation='softmax'),
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 batch_normalization (BatchN  (None, 784)              3136      
 ormalization)                                                   
                                                                 
 dense_4 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 dense_5 (Dense)             (None, 100)               30000     
                                                                 
 batch_normalization_2 (Batc  (None, 100)              4

In [4]:
optimizer = keras.optimizers.SGD(clipvalue=1.0, clipnorm=1.0)
# clipvalue=1.0: loss의 모든 편미분 값을 -1.0 ~ 1.0으로 잘라냄.
# clipnorm=1.0: 해당 값 기준으로 정규화
# 두 인자 모두 기입 시 norm을 먼저 적용
model.compile(loss='mse', optimizer=optimizer)

In [5]:
def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A),
            (X[y_5_or_6], y_B))

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)
X_train_B = X_train_B[:200]
y_train_B = y_train_B[:200]

In [6]:
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))
model_A.compile(loss="sparse_categorical_crossentropy",
                optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                metrics=["accuracy"])
history = model_A.fit(X_train_A, y_train_A, epochs=20,
                    validation_data=(X_valid_A, y_valid_A))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [7]:
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="sigmoid"))
model_B.compile(loss="binary_crossentropy",
                optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                metrics=["accuracy"])
history = model_B.fit(X_train_B, y_train_B, epochs=20,
                      validation_data=(X_valid_B, y_valid_B))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [8]:
model_B_on_A = keras.models.Sequential(model_A.layers[:-1]) # output 제외 전체 layer 반환
model_B_on_A.add(keras.layers.Dense(1, activation='sigmoid'))

In [9]:
model_A_clone = keras.models.clone_model(model_A)   # 모델 구조 복사, 가중치는 복제하지 않음
model_A_clone.set_weights(model_A.get_weights())    # 가중치 복제

In [10]:
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False # 출력층 제외 가중치 동결
# 층을 동결하거나 동결 해제 후 새로 컴파일 필수
model_B_on_A.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [11]:
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4, validation_data=(X_valid_B, y_valid_B))

for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

optimizer = keras.optimizers.SGD(learning_rate=1e-4)    # 전이 학습은 학습률을 더 낮게 줌
model_B_on_A.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16, validation_data=(X_valid_B, y_valid_B))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [12]:
model_B_on_A.evaluate(X_test_B, y_test_B)



[0.15260308980941772, 0.9635000228881836]

In [13]:
optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)
optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True)
optimizer = keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)
optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)

In [14]:
# 거듭제곱법
optimizer = keras.optimizers.SGD(learning_rate=0.01, decay=1e-4)

# 지수
def exponential_decay_fn(epoch):
    return 0.01 * 0.1**(epoch/20)

def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return 0.01 * 0.1**(epoch/20)
    return exponential_decay_fn
exponential_decay_fn = exponential_decay(lr0=0.01, s=20)
# callback 기능을 이용하기 때문에 위의 형태로 작성

lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
history = model.fit(X_train, y_train, batch_size=1028, epochs=200, callbacks=[lr_scheduler])

def exponential_decay_fn(epoch, lr):
    return lr * 0.1**(1/20)

# 구간별 고정
def piecewuse_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch <15:
        return 0.005
    else:
        return 0.001

# 성능 기반
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
# 연속 patience에폭 동안 val_loss가 개션되지 않을 때 factor를 학습률에 곱함

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [15]:
import math

### 1사이클
# 최적 학습률 확인
class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []

    def on_epoch_begin(self, epoch):
        self.prev_loss = 0

    def on_batch_end(self, batch, logs=None):
        batch_loss = logs["loss"] * (batch + 1) - self.prev_loss * batch
        self.prev_loss = logs["loss"]
        self.rates.append(model.optimizer.lr.numpy())
        self.losses.append(batch_loss)
        self.model.optimizer.lr = self.model.optimizer.lr * self.factor

def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):
    init_weights = model.get_weights()
    iterations = math.ceil(len(X) / batch_size) * epochs
    factor = np.exp(np.log(max_rate / min_rate) / iterations)
    init_lr = model.optimizer.lr.numpy()
    model.optimizer.lr = min_rate
    exp_lr = ExponentialLearningRate(factor)
    history = model.fit(X, y, epochs=epochs, batch_size=batch_size,
                        callbacks=[exp_lr])
    model.optimizer.lr = init_lr
    model.set_weights(init_weights)

    return exp_lr.rates, exp_lr.losses

# 1사이클 클래스
class OneCycleScheduler(keras.callbacks.Callback):
    def __init__(self, iterations, max_rate, start_rate=None, last_iterations=None, last_rate=None):
        self.total_iteration = iterations  # 총 학습률 조정 반복 횟수
        self.max_rate = max_rate  # 최대 학습률
        self.start_rate = start_rate or max_rate / 10  # 시작 학습률 (디폴트는 최대 학습률의 10%)
        self.last_iterations = last_iterations or iterations // 10 + 1  # 마지막 단계의 반복 횟수 (디폴트는 총 반복 횟수의 10%)
        self.half_iteration = (iterations - self.last_iterations) // 2  # 중간 단계 반복 횟수
        self.last_rate = last_rate or self.start_rate / 1000  # 마지막 학습률 (디폴트는 시작 학습률의 1/1000)
        self.current_iteration = 0  # 현재 반복 횟수 초기화

    def _interpolate(self, from_iter, to_iter2, from_rate, to_rate):
        # 두 지점 사이에서 선형 보간을 통해 학습률 계산하여 to_iter까지 선형적으로 rate 변화
        return ((to_rate - from_rate) * (self.current_iteration - from_iter) / (to_iter2 - from_iter) + from_rate)
    
    def on_batch_begin(self, batch, logs):
        if self.current_iteration < self.half_iteration:
            # 초기 상승 단계
            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
        elif self.current_iteration < 2 * self.half_iteration:
            # 최대 학습률로 상승한 후 하락 단계
            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration, self.max_rate, self.start_rate)
        else:
            # 마지막 하락 단계
            rate = self._interpolate(2 * self.half_iteration, self.total_iteration, self.start_rate, self.last_rate)
        self.current_iteration += 1  # 반복 횟수 증가
        self.model.optimizer.lr = rate  # 모델의 학습률 업데이트


In [26]:
# 규제 적용 방식
layer = keras.layers.Dense(100, activation='elu',
                           kernel_initializer='he_normal',
                           kernel_regularizer=keras.regularizers.l1(0.01))
layer = keras.layers.Dense(100, activation='elu',
                           kernel_initializer='he_normal',
                           kernel_regularizer=keras.regularizers.l2(0.01))
layer = keras.layers.Dense(100, activation='elu',
                           kernel_initializer='he_normal',
                           kernel_regularizer=keras.regularizers.l1_l2(0.01, 0.01))

In [39]:
from functools import partial
# partial: 함수의 인자 기본값을 새로 지정하여 사용할 수 있게 함.

RegularizedDense = partial(keras.layers.Dense,
                           activation='elu',
                           kernel_initializer='he_normal',
                           kernel_regularizer=keras.regularizers.l1_l2(0.01, 0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),
    RegularizedDense(300),
    RegularizedDense(100, activation='relu'),
    RegularizedDense(10, activation='softmax', kernel_initializer='glorot_uniform')
])

[print(layer.activation) for layer in model.layers[1:]];

<function elu at 0x0000025E2C58A8B0>
<function relu at 0x0000025E2C58E160>
<function softmax at 0x0000025E2C58A700>


In [46]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[29, 29]),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal'),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(10, activation='softmax')
])

In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score

# 데이터 준비 (예제 데이터 사용)
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train = x_train.reshape(-1, 3072).astype('float32') / 255
x_test = x_test.reshape(-1, 3072).astype('float32') / 255

# 모델 정의
model = Sequential([
    Dense(300, activation='relu', input_shape=(3072,)),
    Dropout(0.5),  # Dropout 층 추가
    Dense(100, activation='relu'),
    Dropout(0.5),  # Dropout 층 추가
    Dense(10, activation='softmax')
])

# 모델 컴파일
model.compile(loss="sparse_categorical_crossentropy", optimizer='adam', metrics=["accuracy"])

# 모델 훈련
model.fit(x_train, y_train, batch_size=1024, epochs=100, validation_split=0.2)

# 검증
y_probas = np.stack([model(x_test, training=True)
                     for sample in range(50)])
y_proba = y_probas.mean(axis=0)
y_pred = np.argmax(y_proba, axis=1)

print(accuracy_score(y_test, np.argmax(model.predict(x_test), axis=1)))
print(accuracy_score(y_test, y_pred))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100

In [None]:
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

mc_model = keras.models.Sequential([
    MCDropout(layer.rate) if isinstance(layer, keras.layers.Dropout) else layer
    for layer in model.layers
])

optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
mc_model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
mc_model.set_weights(model.get_weights())

# y_probas = np.stack([mc_model(x_test, training=True)
#                      for sample in range(1000)])
y_probas = np.stack([mc_model.predict(x_test)
                     for sample in range(10)])
                     
y_proba = y_probas.mean(axis=0)
y_pred = np.argmax(y_proba, axis=1)

print(accuracy_score(y_test, np.argmax(mc_model.predict(x_test), axis=1)))
print(accuracy_score(y_test, y_pred))

In [3]:
keras.layers.Dense(100, activation='elu', kernel_initializer='he_normal',
                   kernel_constraint=keras.constraints.max_norm(1.));

In [14]:
import tensorflow as tf

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
    model.add(keras.layers.Dense(100,
                                 activation="elu",
                                 kernel_initializer="he_normal"))
model.add(keras.layers.Dense(10, activation="softmax"))

optimizer = keras.optimizers.Nadam(learning_rate=5e-5)
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=optimizer,
              metrics=["accuracy"])

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()

X_train = X_train_full[2000:10000]/255
y_train = y_train_full[2000:10000].flatten()
X_valid = X_train_full[:2000]/255
y_valid = y_train_full[:2000].flatten()

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
callbacks = [early_stopping_cb]

history = model.fit(X_train, y_train, epochs=200, batch_size=512, validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/200
16/16 - 2s - loss: 3.1143 - accuracy: 0.1166 - val_loss: 2.4854 - val_accuracy: 0.1315 - 2s/epoch - 136ms/step
Epoch 2/200
16/16 - 0s - loss: 2.3339 - accuracy: 0.1628 - val_loss: 2.2541 - val_accuracy: 0.1860 - 279ms/epoch - 17ms/step
Epoch 3/200
16/16 - 0s - loss: 2.1691 - accuracy: 0.2120 - val_loss: 2.1269 - val_accuracy: 0.2220 - 232ms/epoch - 14ms/step
Epoch 4/200
16/16 - 0s - loss: 2.0951 - accuracy: 0.2379 - val_loss: 2.0982 - val_accuracy: 0.2280 - 222ms/epoch - 14ms/step
Epoch 5/200
16/16 - 0s - loss: 2.0404 - accuracy: 0.2526 - val_loss: 2.0090 - val_accuracy: 0.2690 - 219ms/epoch - 14ms/step
Epoch 6/200
16/16 - 0s - loss: 1.9975 - accuracy: 0.2755 - val_loss: 1.9764 - val_accuracy: 0.2820 - 228ms/epoch - 14ms/step
Epoch 7/200
16/16 - 0s - loss: 1.9700 - accuracy: 0.2851 - val_loss: 1.9823 - val_accuracy: 0.2830 - 247ms/epoch - 15ms/step
Epoch 8/200
16/16 - 0s - loss: 1.9397 - accuracy: 0.2980 - val_loss: 1.9876 - val_accuracy: 0.2855 - 236ms/epoch - 15ms/step
Ep

## 연습문제 구현

In [1]:
import numpy as np, os, math
from tensorflow import keras

def get_run_logdir(idx):
    return os.path.join(os.getcwd(), 'my_board', f'run_{idx:02}')


(X_train_full, y_train_full), (X_test_full, y_test_full) = keras.datasets.cifar10.load_data()

X_train = X_train_full[2000:10000]/255
y_train = y_train_full[2000:10000].flatten()
X_valid = X_train_full[:2000]/255
y_valid = y_train_full[:2000].flatten()
X_test = X_test_full/255
y_test = y_test_full.flatten()

early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)

In [2]:
from tensorflow import keras
import numpy as np

class MakeModel(keras.models.Sequential):
    def __init__(self, input_num, hidden_nums, output_num, **kwargs):
        super().__init__([keras.layers.Input(shape=input_num),
                          keras.layers.Flatten(input_shape=[32, 32, 3])] +
                          [keras.layers.Dense(hidden_num, 'elu', kernel_initializer='he_normal') 
                           for hidden_num in hidden_nums] +
                           [keras.layers.Dense(10, 'softmax')]
                           )
        
        self.compile('nadam', 'sparse_categorical_crossentropy', ['accuracy'])
    
h_ls = np.linspace(1028, 100, 20).astype(int).tolist()
model = MakeModel((32, 32, 3), h_ls, 10)

history = model.fit(X_train, y_train, epochs=100, batch_size=512, validation_data=(X_valid, y_valid), verbose=2, 
                    callbacks=[early_stopping_cb, keras.callbacks.TensorBoard(get_run_logdir(1))])
model.evaluate(X_test, y_test)

Epoch 1/100
16/16 - 4s - loss: 14.4061 - accuracy: 0.0981 - val_loss: 3.9535 - val_accuracy: 0.1040 - 4s/epoch - 230ms/step
Epoch 2/100
16/16 - 1s - loss: 7.3103 - accuracy: 0.1007 - val_loss: 5.4795 - val_accuracy: 0.1015 - 906ms/epoch - 57ms/step
Epoch 3/100
16/16 - 1s - loss: 5.1126 - accuracy: 0.1016 - val_loss: 5.0060 - val_accuracy: 0.0995 - 910ms/epoch - 57ms/step
Epoch 4/100
16/16 - 1s - loss: 4.4764 - accuracy: 0.1006 - val_loss: 5.2232 - val_accuracy: 0.1015 - 925ms/epoch - 58ms/step
Epoch 5/100
16/16 - 1s - loss: 3.4890 - accuracy: 0.0949 - val_loss: 2.5262 - val_accuracy: 0.1015 - 917ms/epoch - 57ms/step
Epoch 6/100
16/16 - 1s - loss: 2.4408 - accuracy: 0.1070 - val_loss: 2.2713 - val_accuracy: 0.1730 - 921ms/epoch - 58ms/step
Epoch 7/100
16/16 - 1s - loss: 2.3341 - accuracy: 0.1175 - val_loss: 2.2820 - val_accuracy: 0.1605 - 917ms/epoch - 57ms/step
Epoch 8/100
16/16 - 1s - loss: 2.2883 - accuracy: 0.1396 - val_loss: 2.3412 - val_accuracy: 0.1060 - 927ms/epoch - 58ms/step
E

[1.8897080421447754, 0.38989999890327454]

In [3]:
class MakeModel(keras.models.Model):
    def __init__(self, input_num, hidden_nums, output_num, **kwargs):
        x = keras.layers.Input(shape=input_num)
        h = keras.layers.Flatten(input_shape=[32, 32, 3])(x)
        for hidden_num in hidden_nums:
            h = keras.layers.Dense(hidden_num, 'elu', kernel_initializer='he_normal')(h)
            h = keras.layers.BatchNormalization()(h)
        y = keras.layers.Dense(10, 'softmax')(h)
        super().__init__(x, y)
        self.compile('nadam', 'sparse_categorical_crossentropy', ['accuracy'])
    
h_ls = np.linspace(1028, 100, 20).astype(int).tolist()
model = MakeModel((32, 32, 3), h_ls, 10)

history = model.fit(X_train, y_train, epochs=100, batch_size=512, validation_data=(X_valid, y_valid), verbose=2, 
                    callbacks=[early_stopping_cb, keras.callbacks.TensorBoard(get_run_logdir(2))])
model.evaluate(X_test, y_test)

Epoch 1/100
16/16 - 12s - loss: 2.1578 - accuracy: 0.2738 - val_loss: 21.7483 - val_accuracy: 0.1050 - 12s/epoch - 766ms/step
Epoch 2/100
16/16 - 1s - loss: 1.6291 - accuracy: 0.4191 - val_loss: 14.1378 - val_accuracy: 0.1420 - 1s/epoch - 83ms/step
Epoch 3/100
16/16 - 1s - loss: 1.4607 - accuracy: 0.4864 - val_loss: 8.3764 - val_accuracy: 0.1370 - 1s/epoch - 89ms/step
Epoch 4/100
16/16 - 2s - loss: 1.3215 - accuracy: 0.5385 - val_loss: 7.0054 - val_accuracy: 0.1625 - 2s/epoch - 129ms/step
Epoch 5/100
16/16 - 2s - loss: 1.2462 - accuracy: 0.5616 - val_loss: 4.4123 - val_accuracy: 0.1960 - 2s/epoch - 103ms/step
Epoch 6/100
16/16 - 1s - loss: 1.0885 - accuracy: 0.6200 - val_loss: 3.8427 - val_accuracy: 0.2075 - 1s/epoch - 88ms/step
Epoch 7/100
16/16 - 1s - loss: 1.0134 - accuracy: 0.6436 - val_loss: 4.2179 - val_accuracy: 0.1650 - 1s/epoch - 85ms/step
Epoch 8/100
16/16 - 1s - loss: 0.9497 - accuracy: 0.6693 - val_loss: 4.3009 - val_accuracy: 0.2160 - 1s/epoch - 85ms/step
Epoch 9/100
16/16

[4.104167461395264, 0.3361999988555908]

In [4]:
pixel_means = X_train_full[2000:10000].mean(axis=0, keepdims=True)
pixel_stds = X_train_full[2000:10000].std(axis=0, keepdims=True)
X_train_scaled = (X_train_full[2000:10000] - pixel_means) / pixel_stds
X_valid_scaled = (X_train_full[:2000] - pixel_means) / pixel_stds
X_test_scaled = (X_test_full - pixel_means) / pixel_stds

x = keras.layers.Input(shape=X_train.shape[1:])
h = keras.layers.Flatten(input_shape=X_train.shape[1:])(x)
for hidden_num in h_ls:
    h = keras.layers.Dense(hidden_num, 'selu', kernel_initializer='lecun_normal')(h)
    h = keras.layers.BatchNormalization()(h)
y = keras.layers.Dense(10, 'softmax')(h)
model = keras.Model(x, y)
model.compile('nadam', 'sparse_categorical_crossentropy', ['accuracy'])

history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=512, validation_data=(X_valid_scaled, y_valid), verbose=2, 
                    callbacks=[early_stopping_cb, keras.callbacks.TensorBoard(get_run_logdir(3))])
model.evaluate(X_test_scaled, y_test)

Epoch 1/100
16/16 - 11s - loss: 2.2154 - accuracy: 0.2463 - val_loss: 3.6938 - val_accuracy: 0.2180 - 11s/epoch - 718ms/step
Epoch 2/100
16/16 - 1s - loss: 1.7900 - accuracy: 0.3569 - val_loss: 3.3271 - val_accuracy: 0.2375 - 1s/epoch - 83ms/step
Epoch 3/100
16/16 - 1s - loss: 1.6747 - accuracy: 0.4027 - val_loss: 3.1751 - val_accuracy: 0.2545 - 1s/epoch - 87ms/step
Epoch 4/100
16/16 - 1s - loss: 1.5879 - accuracy: 0.4386 - val_loss: 3.1795 - val_accuracy: 0.2830 - 1s/epoch - 90ms/step
Epoch 5/100
16/16 - 2s - loss: 1.5006 - accuracy: 0.4706 - val_loss: 2.5543 - val_accuracy: 0.3400 - 2s/epoch - 118ms/step
Epoch 6/100
16/16 - 1s - loss: 1.4697 - accuracy: 0.4839 - val_loss: 2.3632 - val_accuracy: 0.3595 - 1s/epoch - 92ms/step
Epoch 7/100
16/16 - 1s - loss: 1.4146 - accuracy: 0.5058 - val_loss: 3.8070 - val_accuracy: 0.2750 - 1s/epoch - 88ms/step
Epoch 8/100
16/16 - 1s - loss: 1.3799 - accuracy: 0.5231 - val_loss: 3.0113 - val_accuracy: 0.2670 - 1s/epoch - 86ms/step
Epoch 9/100
16/16 - 

[3.7841546535491943, 0.3431999981403351]

In [5]:
class MakeModel(keras.models.Sequential):
    def __init__(self, input_num, hidden_nums, output_num, **kwargs):
        super().__init__()
        self.add(keras.layers.Input(shape=input_num))
        self.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
        for hidden_num in hidden_nums:
            self.add(keras.layers.Dense(hidden_num, 'selu', kernel_initializer='lecun_normal'))
            self.add(keras.layers.BatchNormalization())
            self.add(keras.layers.AlphaDropout(0.5))
        self.add(keras.layers.Dense(10, 'softmax'))
        self.compile('nadam', 'sparse_categorical_crossentropy', ['accuracy'])

model = MakeModel((32, 32, 3), h_ls, 10)

history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=512, validation_data=(X_valid_scaled, y_valid), verbose=2, 
                    callbacks=[early_stopping_cb, keras.callbacks.TensorBoard(get_run_logdir(4))])
print(model.evaluate(X_test_scaled, y_test))

class MCDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

mc_model = keras.models.Sequential([
    MCDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
    for layer in model.layers
])

optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
mc_model.compile('nadam', 'sparse_categorical_crossentropy', ['accuracy'])
mc_model.set_weights(model.get_weights())

y_probas = np.stack([mc_model.predict(X_test_scaled)
                     for sample in range(100)])
                     
y_proba = y_probas.mean(axis=0)
y_pred = np.argmax(y_proba, axis=1)

print(accuracy_score(y_test, y_pred))

Epoch 1/100
16/16 - 13s - loss: 3.0184 - accuracy: 0.1015 - val_loss: 2.8632 - val_accuracy: 0.1220 - 13s/epoch - 811ms/step
Epoch 2/100
16/16 - 1s - loss: 2.9525 - accuracy: 0.1000 - val_loss: 2.7808 - val_accuracy: 0.1145 - 1s/epoch - 91ms/step
Epoch 3/100
16/16 - 2s - loss: 2.8248 - accuracy: 0.1046 - val_loss: 2.8957 - val_accuracy: 0.1065 - 2s/epoch - 99ms/step
Epoch 4/100
16/16 - 1s - loss: 2.7818 - accuracy: 0.0978 - val_loss: 2.6763 - val_accuracy: 0.1205 - 1s/epoch - 93ms/step
Epoch 5/100
16/16 - 1s - loss: 2.7047 - accuracy: 0.0988 - val_loss: 2.6166 - val_accuracy: 0.1120 - 1s/epoch - 90ms/step
Epoch 6/100
16/16 - 1s - loss: 2.6429 - accuracy: 0.1005 - val_loss: 2.4326 - val_accuracy: 0.1220 - 1s/epoch - 93ms/step
Epoch 7/100
16/16 - 1s - loss: 2.6034 - accuracy: 0.0945 - val_loss: 2.4396 - val_accuracy: 0.0990 - 1s/epoch - 94ms/step
Epoch 8/100
16/16 - 2s - loss: 2.5527 - accuracy: 0.1046 - val_loss: 2.3925 - val_accuracy: 0.1005 - 2s/epoch - 94ms/step
Epoch 9/100
16/16 - 2

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [6]:
# 최적 학습률 확인
class ExponentialLearningRate(keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []

    def on_epoch_begin(self, epoch, logs=None):
        self.prev_loss = 0

    def on_batch_end(self, batch, logs=None):
        batch_loss = logs["loss"] * (batch + 1) - self.prev_loss * batch
        self.prev_loss = logs["loss"]
        self.rates.append(model.optimizer.lr.numpy())
        self.losses.append(batch_loss)
        self.model.optimizer.lr = self.model.optimizer.lr * self.factor

def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):
    init_weights = model.get_weights()
    iterations = math.ceil(len(X) / batch_size) * epochs
    factor = np.exp(np.log(max_rate / min_rate) / iterations)
    init_lr = model.optimizer.lr.numpy()
    model.optimizer.lr = min_rate
    exp_lr = ExponentialLearningRate(factor)
    history = model.fit(X, y, epochs=epochs, batch_size=batch_size,
                        callbacks=[exp_lr])
    model.optimizer.lr = init_lr
    model.set_weights(init_weights)

    return exp_lr.rates, exp_lr.losses

model = MakeModel((32, 32, 3), h_ls, 10)
rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=1)
lr = rates[losses.index(min(losses))-10]
print(lr)

9.676056


In [7]:
# 1사이클 클래스
class OneCycleScheduler(keras.callbacks.Callback):
    def __init__(self, iterations, max_rate, start_rate=None, last_iterations=None, last_rate=None):
        self.total_iteration = iterations  # 총 학습률 조정 반복 횟수
        self.max_rate = max_rate  # 최대 학습률
        self.start_rate = start_rate or max_rate / 10  # 시작 학습률 (디폴트는 최대 학습률의 10%)
        self.last_iterations = last_iterations or iterations // 10 + 1  # 마지막 단계의 반복 횟수 (디폴트는 총 반복 횟수의 10%)
        self.half_iteration = (iterations - self.last_iterations) // 2  # 중간 단계 반복 횟수
        self.last_rate = last_rate or self.start_rate / 1000  # 마지막 학습률 (디폴트는 시작 학습률의 1/1000)
        self.current_iteration = 0  # 현재 반복 횟수 초기화

    def _interpolate(self, from_iter, to_iter2, from_rate, to_rate):
        # 두 지점 사이에서 선형 보간을 통해 학습률 계산하여 to_iter까지 선형적으로 rate 변화
        return ((to_rate - from_rate) * (self.current_iteration - from_iter) / (to_iter2 - from_iter) + from_rate)
    
    def on_batch_begin(self, batch, logs):
        if self.current_iteration < self.half_iteration:
            # 초기 상승 단계
            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
        elif self.current_iteration < 2 * self.half_iteration:
            # 최대 학습률로 상승한 후 하락 단계
            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration, self.max_rate, self.start_rate)
        else:
            # 마지막 하락 단계
            rate = self._interpolate(2 * self.half_iteration, self.total_iteration, self.start_rate, self.last_rate)
        self.current_iteration += 1  # 반복 횟수 증가
        self.model.optimizer.lr = rate  # 모델의 학습률 업데이트

class MakeModel(keras.models.Sequential):
    def __init__(self, input_num, hidden_nums, output_num, **kwargs):
        super().__init__()
        self.add(keras.layers.Input(shape=input_num))
        self.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
        for hidden_num in hidden_nums:
            self.add(keras.layers.Dense(hidden_num, 'selu', kernel_initializer='lecun_normal'))
            self.add(keras.layers.BatchNormalization())
        self.add(keras.layers.Dense(10, 'softmax'))
        self.compile('nadam', 'sparse_categorical_crossentropy', ['accuracy'])

model = MakeModel((32, 32, 3), h_ls, 10)

history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=512, validation_data=(X_valid_scaled, y_valid), verbose=2, 
                    callbacks=[keras.callbacks.TensorBoard(get_run_logdir(5))])
print(model.evaluate(X_test_scaled, y_test))

Epoch 1/100
16/16 - 20s - loss: 2.2121 - accuracy: 0.2447 - val_loss: 3.4649 - val_accuracy: 0.2500 - 20s/epoch - 1s/step
Epoch 2/100
16/16 - 3s - loss: 1.7829 - accuracy: 0.3664 - val_loss: 3.0001 - val_accuracy: 0.2910 - 3s/epoch - 175ms/step
Epoch 3/100
16/16 - 2s - loss: 1.6860 - accuracy: 0.3963 - val_loss: 2.8616 - val_accuracy: 0.3155 - 2s/epoch - 117ms/step
Epoch 4/100
16/16 - 2s - loss: 1.5879 - accuracy: 0.4366 - val_loss: 2.8086 - val_accuracy: 0.3165 - 2s/epoch - 95ms/step
Epoch 5/100
16/16 - 2s - loss: 1.5478 - accuracy: 0.4615 - val_loss: 2.8607 - val_accuracy: 0.3380 - 2s/epoch - 116ms/step
Epoch 6/100
16/16 - 2s - loss: 1.5059 - accuracy: 0.4723 - val_loss: 2.8439 - val_accuracy: 0.3260 - 2s/epoch - 128ms/step
Epoch 7/100
16/16 - 3s - loss: 1.3857 - accuracy: 0.5159 - val_loss: 2.7435 - val_accuracy: 0.3285 - 3s/epoch - 184ms/step
Epoch 8/100
16/16 - 3s - loss: 1.3865 - accuracy: 0.5163 - val_loss: 2.6510 - val_accuracy: 0.3680 - 3s/epoch - 170ms/step
Epoch 9/100
16/16 