In [1]:
import math
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import MeanSquaredLogarithmicError
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
import  keras_tuner as kt

In [2]:
# read the training and test data
train_data = pd.read_csv(r'california_housing_train.csv')
test_data = pd.read_csv(r'california_housing_test.csv')
TARGET_NAME = 'median_house_value'

In [3]:
train_data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [4]:
train_data.isnull().sum()

longitude             0
latitude              0
housing_median_age    0
total_rooms           0
total_bedrooms        0
population            0
households            0
median_income         0
median_house_value    0
dtype: int64

In [5]:
# split the data into features and target
x_train, y_train = train_data.drop(TARGET_NAME, axis=1), train_data[TARGET_NAME]
x_test, y_test = test_data.drop(TARGET_NAME, axis=1), test_data[TARGET_NAME]

In [6]:
y_train.head()

0    66900.0
1    80100.0
2    85700.0
3    73400.0
4    65500.0
Name: median_house_value, dtype: float64

In [7]:
def scale_datasets(x_train, x_test):
  """
  Standard Scale test and train data
  Z - Score normalization
  """
  standard_scaler = StandardScaler()
  x_train_scaled = pd.DataFrame(
      standard_scaler.fit_transform(x_train),
      columns=x_train.columns
  )
  x_test_scaled = pd.DataFrame(
      standard_scaler.transform(x_test),
      columns = x_test.columns
  )
  return x_train_scaled, x_test_scaled

In [8]:
# scale the dataset
x_train_scaled, x_test_scaled = scale_datasets(x_train, x_test)

In [9]:
def build_model(hp):
    model = keras.Sequential()
    for i in range(hp.Int('num_layers', 2, 20)):
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_absolute_error',
        metrics=['mean_absolute_error'])
    return model

In [11]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_mean_absolute_error',
    max_trials=5,
    executions_per_trial=3,
    directory='project',
    project_name='CalHouse')

In [13]:
tuner.search(x_train_scaled, y_train,
             epochs=5,
             validation_data=(x_test_scaled, y_test))

Trial 5 Complete [00h 02m 29s]
val_mean_absolute_error: 39677.139322916664

Best val_mean_absolute_error So Far: 39677.139322916664
Total elapsed time: 00h 06m 55s
INFO:tensorflow:Oracle triggered exit


In [14]:
tuner.results_summary()

Results summary
Results in project\CalHouse
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000022A0918DFD0>
Trial summary
Hyperparameters:
num_layers: 20
units_0: 256
units_1: 512
learning_rate: 0.001
units_2: 192
units_3: 448
units_4: 192
units_5: 352
units_6: 352
units_7: 320
units_8: 416
units_9: 256
units_10: 96
units_11: 256
units_12: 224
units_13: 288
units_14: 256
units_15: 224
units_16: 224
units_17: 32
units_18: 32
units_19: 32
Score: 39677.139322916664
Trial summary
Hyperparameters:
num_layers: 18
units_0: 352
units_1: 320
learning_rate: 0.0001
units_2: 480
units_3: 160
units_4: 160
units_5: 416
units_6: 64
units_7: 224
units_8: 192
units_9: 256
units_10: 128
units_11: 448
units_12: 64
units_13: 128
units_14: 128
units_15: 160
units_16: 32
units_17: 32
Score: 41771.243489583336
Trial summary
Hyperparameters:
num_layers: 9
units_0: 416
units_1: 96
learning_rate: 0.0001
units_2: 320
units_3: 320
units_4: 160
units_5: 480
units_6: 96
units_7: 448
uni