In [1]:
# Boston Housing Price DataSet: 506 Sample with 404 Training.
# Each feature have different scale.
from keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

2024-05-07 06:57:28.280905: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-07 06:57:28.280961: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-07 06:57:28.282238: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-07 06:57:28.290513: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz


In [2]:
train_data.shape
# 13 features.

(404, 13)

In [3]:
# Prepare data - Heterogenous makes learning difficult.
# Featurewise normalization = (x-mean)/std_dev 
#   feature will be centered around 0 and will be std-dev unit.

mean = train_data.mean(axis=0)
train_data -= mean

std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [4]:
# Very small dataset, so small network. 2 hidden layer with 64 units. It help in avoid overfitting.

from keras import models
from keras import layers

def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1], )))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1)) # No activation, so linear layer. Now no constraints on o/p values.

    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

In [6]:
# dataset is small. So, validation dataset will be so small. Validation score will change drastically.
# So, use K-fold cross validation - Split data in k partitions ( k=4 or 5)
# instantiate k identical models. Train each one on k-1 partition while evaluate on remaining partition.

# fold 1 = v, t, t
# fold 2 = t, v, t
# fold 3 = t, t, v

# Average validation score for all folds.

import numpy as np

k = 4
num_val_samples = len(train_data) // k
num_epochs = 100

all_scores = []

for i in range(k):
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
    
    partial_train_data = np.concatenate([train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([train_targets[:i * num_val_samples], train_targets[(i + 1) * num_val_samples:]], axis=0)
    
    model = build_model()
    model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=1, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

In [7]:
print(all_scores)
print(np.mean(all_scores))

[2.1575584411621094, 2.495252847671509, 2.601358652114868, 2.634007453918457]
2.472044348716736


In [8]:
# Now we keep saving mae history and then we plot a graph of average of mae hostory for all i for 500 epochs. We observe that after 80, it stop improving.

# Final Model
# Get a fresh, compiled model.
model = build_model()
# Train it on the entirety of the data.
model.fit(train_data, train_targets, epochs=80, batch_size=16, verbose=0)
test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)
print(test_mse_score)
print(test_mae_score)

16.983549118041992
2.5753986835479736
