In [1]:
from keras.datasets import boston_housing

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz


In [5]:
# 정규화
# 특성의 평균을 빼고 표준편차로 나누기
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [7]:
# 모델 정의
from keras import models
from keras import layers

def build_model(): # 동일한 모델을 여러번 생성할거라 함수로 만들기
    model = models.Sequential()
    # 훈련 데이터 개수 적을수록 과대적합 쉽게 일어남 -> 작은 모델 사용하기
    model.add(layers.Dense(64, activation='relu',
                          input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1)) # 활성화 함수 없이 선형 층 -> 스칼라 회귀를 위한 구성
    # mse: 평균 제곱 오차 -> 예측과 타깃 사이 거리의 제곱
    # mae: 평균 절대 오차 -> 예측과 타깃 사이 거리의 절댓값
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    
    return model

In [9]:
# K-fold 교차검증
import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []
for i in range(k):
    print("처리중인 폴드 #", i)
    # 검증 데이터 준비: k번째 분할
    val_data = train_data[i*num_val_samples: (i+1)*num_val_samples]
    val_targets = train_targets[i*num_val_samples: (i+1)*num_val_samples]
    
    # 훈련 데이터 준비: 나머지 분할 전체
    partial_train_data = np.concatenate(
    [train_data[:i*num_val_samples],
    train_data[(i+1)*num_val_samples:]],
    axis=0)
    partial_train_targets = np.concatenate(
    [train_targets[:i*num_val_samples],
    train_targets[(i+1)*num_val_samples:]],
    axis=0)
    
    # 케라스 모델 구성(컴파일 포함)
    model = build_model()
    model.fit(partial_train_data, partial_train_targets,
             epochs=num_epochs, batch_size=1, verbose=0)# verbose=0->훈련과정 출력X
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0) # 검증 세트로 모델 평가
    all_scores.append(val_mae)

처리중인 폴드 # 0


2022-03-14 16:40:11.877739: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 16:41:43.798115: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


처리중인 폴드 # 1


2022-03-14 16:41:44.491719: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 16:43:18.793940: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 16:43:18.994468: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


처리중인 폴드 # 2


2022-03-14 16:44:50.561270: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


처리중인 폴드 # 3


2022-03-14 16:44:50.765734: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-14 16:46:22.591084: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [10]:
all_scores

[2.2916173934936523, 2.871204376220703, 2.6052770614624023, 2.4288601875305176]

In [11]:
np.mean(all_scores)

2.549239754676819