# Boston housing price regression dataset
Dataset taken from the StatLib library which is maintained at Carnegie Mellon University.

Samples contain 13 attributes of houses at different locations around the Boston suburbs in the late 1970s. Targets are the median values of the houses at a location (in k$).

### Import TensorFlow


In [None]:
import tensorflow as tf
print(tf.__version__)

2.18.0


### Import dataset
- This dataset can be imported
- High level API Keras has some datasets available
- You can look at all the datasets available here https://keras.io/datasets/


In [None]:
import tensorflow.keras.datasets as ds
print(dir(ds))


['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'boston_housing', 'california_housing', 'cifar10', 'cifar100', 'fashion_mnist', 'imdb', 'mnist', 'reuters']


In [None]:
from tensorflow.keras.datasets import boston_housing

In [None]:
 boston_housing.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz
[1m57026/57026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


((array([[1.23247e+00, 0.00000e+00, 8.14000e+00, ..., 2.10000e+01,
          3.96900e+02, 1.87200e+01],
         [2.17700e-02, 8.25000e+01, 2.03000e+00, ..., 1.47000e+01,
          3.95380e+02, 3.11000e+00],
         [4.89822e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
          3.75520e+02, 3.26000e+00],
         ...,
         [3.46600e-02, 3.50000e+01, 6.06000e+00, ..., 1.69000e+01,
          3.62250e+02, 7.83000e+00],
         [2.14918e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
          2.61950e+02, 1.57900e+01],
         [1.43900e-02, 6.00000e+01, 2.93000e+00, ..., 1.56000e+01,
          3.76700e+02, 4.38000e+00]]),
  array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4, 12.1,
         17.9, 23.1, 19.9, 15.7,  8.8, 50. , 22.5, 24.1, 27.5, 10.9, 30.8,
         32.9, 24. , 18.5, 13.3, 22.9, 34.7, 16.6, 17.5, 22.3, 16.1, 14.9,
         23.1, 34.9, 25. , 13.9, 13.1, 20.4, 20. , 15.2, 24.7, 22.2, 16.7,
         12.7, 15.6, 18.4, 21. , 30.1, 15.1, 18.7,  9.6, 31.

In [None]:
boston_housing.load_data(test_split=0)

((array([[1.23247e+00, 0.00000e+00, 8.14000e+00, ..., 2.10000e+01,
          3.96900e+02, 1.87200e+01],
         [2.17700e-02, 8.25000e+01, 2.03000e+00, ..., 1.47000e+01,
          3.95380e+02, 3.11000e+00],
         [4.89822e+00, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
          3.75520e+02, 3.26000e+00],
         ...,
         [1.83377e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
          3.89610e+02, 1.92000e+00],
         [3.58090e-01, 0.00000e+00, 6.20000e+00, ..., 1.74000e+01,
          3.91700e+02, 9.71000e+00],
         [2.92400e+00, 0.00000e+00, 1.95800e+01, ..., 1.47000e+01,
          2.40160e+02, 9.81000e+00]]),
  array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4, 12.1,
         17.9, 23.1, 19.9, 15.7,  8.8, 50. , 22.5, 24.1, 27.5, 10.9, 30.8,
         32.9, 24. , 18.5, 13.3, 22.9, 34.7, 16.6, 17.5, 22.3, 16.1, 14.9,
         23.1, 34.9, 25. , 13.9, 13.1, 20.4, 20. , 15.2, 24.7, 22.2, 16.7,
         12.7, 15.6, 18.4, 21. , 30.1, 15.1, 18.7,  9.6, 31.

In [None]:
# boston_housing.load_data() function returns 2 tuples, one for train data and
# other for test data. We will take only train data here.
(features, actual_prices), _ = boston_housing.load_data(test_split=0)

### Getting details of dataset
- We will see how many rows are there in the data
- We will check how many features are there

In [None]:
print('Number of examples: ', features.shape[0])
print('Number of features for each example: ', features.shape[1])
print('Shape of actual prices data: ', actual_prices.shape)

Number of examples:  506
Number of features for each example:  13
Shape of actual prices data:  (506,)


### Build the model
- The Sequential model is a linear stack of layers.
- The model needs to know what input shape it should expect. For this reason, the first layer in a Sequential model (and only the first, because following layers can do automatic shape inference) needs to receive information about its input shape.
- You can also simply add layers via the .add() method
- You can read more about it here https://keras.io/getting-started/sequential-model-guide/

In [None]:
# Initialize Sequential model
model = tf.keras.models.Sequential()

# Normalize input data
model.add(tf.keras.layers.BatchNormalization(input_shape=(13,)))

# Add Dense layer for prediction - Keras declares weights and bias automatically
model.add(tf.keras.layers.Dense(1))

  super().__init__(**kwargs)


### Compile the model
- Here we configure the model for training
- We will specify an optimizer and a loss function
- You can read more about it here https://keras.io/models/sequential/


In [None]:
# Compile the model - add mse as loss and stochastic gradient descent as optimizer
model.compile(optimizer='sgd', loss='mse')

### Fit the model
- .fit() trains the model for a fixed number of epochs (iterations on a dataset)
- An epoch is an iteration over the entire x and y data provided

In [None]:
model.fit(features, actual_prices, epochs=5)

Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 299.3208  
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 41.0864 
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 35.7660 
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 39.4049 
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 32.3799 


<keras.src.callbacks.history.History at 0x7cc3b22c8450>