In [2]:
# 波士顿房价预测:回归

# numpy 1.17 版本与keras兼容有问题,需要设置 allow_pickle = True
import numpy as np
old = np.load
np.load = lambda *a,**k: old(*a,**k,allow_pickle=True)

from keras.datasets import boston_housing
# 加载数据
# 设置 path 以便使用本地文件
# targets中为房价
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data(path = "boston_housing.npz")

# 还原 numpy 设置
np.load = old

Using TensorFlow backend.


In [5]:
# 数据标准化
mean = train_data.mean(axis = 0)
train_data -= mean
std = train_data.std(axis = 0)
train_data /= std

test_data -= mean
test_data /= std


In [6]:
# 构建网络
#from keras import models
#from keras import layers
# tensorflow 2.0 版本,需要如下写法
from tensorflow.keras import models
from tensorflow.keras import layers

def build_model():
    # 层设置
    model = models.Sequential()
    model.add(layers.Dense(64, activation = 'relu', input_shape = (train_data.shape[1],)))
    model.add(layers.Dense(64, activation = 'relu'))
    model.add(layers.Dense(1))

    # 编译模型
    # 也可以使用类方式
    model.compile(optimizer = 'rmsprop', loss = 'mse', metrics = ['mae'])

    return model


In [7]:
# k折交叉验证:数据集较少,充分发挥数据潜力
import numpy as np

k = 4
num_val_samples = len(train_data) 
num_epochs = 100
all_scores = []

for i in range(k):
    print('processing fold #', i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate( # 准备训练数据：其他所有分区的数据
        [train_data[:i * num_val_samples],
         train_data[(i + 1) * num_val_samples:]],
         axis = 0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
         train_targets[(i + 1) * num_val_samples:]],
         axis = 0)
    
    print(len(partial_train_data))
    model = build_model()
    history = model.fit(partial_train_data, partial_train_targets,
              epochs = num_epochs, batch_size = 1, verbose = 0)
    print(history.history)
    val_mse, val_mae = model.evalute(val_data, val_targets, verbose = 0)
    all_scores.append(val_mae)

processing fold # 0
0


In [None]:
# k折交叉验证:数据集较少,充分发挥数据潜力
# 保存每折的验证结果
import numpy as np

k = 4
num_val_samples = len(train_data) 
num_epochs = 100
all_scores = []
all_mae_histories = []

for i in range(k):
    print('processing fold #', i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate( # 准备训练数据：其他所有分区的数据
        [train_data[:i * num_val_samples],
         train_data[(i + 1) * num_val_samples:]],
         axis = 0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
         train_targets[(i + 1) * num_val_samples:]],
         axis = 0)
    
    model = build_model()
    history = model.fit(partial_train_data, partial_train_targets,
              epochs = num_epochs, batch_size = 1, verbose = 0)
    mae_history = history.history['val_mean_absolute_error']
    all_mae_histories.append(mae_history)

# 计算 mae 均值
average_mae_history = [
    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

In [None]:
# 绘制验证分数
%matplotlib inline
import matplotlib.pyplot as plt

plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epoches')
plt.ylabel('Validation MAE')
plt.show()
