In [37]:
import os
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras
import numpy as np
train = pd.read_csv('kc_house_data.csv') 
corr_matrix = train.corr()
corr_matrix["price"].sort_values(ascending=False)

price            1.000000
sqft_living      0.702035
grade            0.667434
sqft_above       0.605567
sqft_living15    0.585379
bathrooms        0.525138
view             0.397293
sqft_basement    0.323816
bedrooms         0.308350
lat              0.307003
waterfront       0.266369
floors           0.256794
yr_renovated     0.126434
sqft_lot         0.089661
sqft_lot15       0.082447
yr_built         0.054012
condition        0.036362
long             0.021626
id              -0.016762
zipcode         -0.053203
Name: price, dtype: float64

In [38]:
train = train.drop(['id'], axis=1)

In [39]:
train_num = train.select_dtypes(exclude=['object'])
train_cat = train.select_dtypes(include=['object'])

In [40]:
train_num.fillna(0, inplace=True)
train_cat.fillna('NONE', inplace=True)

In [41]:
train_cat = pd.get_dummies(train_cat, dummy_na=False, sparse=True)

In [42]:
label = train_num[["price"]]

In [43]:
train_num = train_num.drop("price", axis=1)

In [44]:
train_num_cols = list(train_num.columns)

In [45]:
x = train_num.values
minmax_scaler = MinMaxScaler()
x_scaled = minmax_scaler.fit_transform(x)
train_num = pd.DataFrame(x_scaled, columns=train_num_cols)

In [46]:
dataset = pd.merge(train_num, train_cat, left_index=True, right_index=True)

In [47]:
dataset = dataset.reindex(sorted(dataset.columns), axis=1)

In [48]:
dataset = dataset.values

In [57]:
X_train = np.asarray(dataset).astype(np.float32)
y_train = np.asarray(label).astype(np.float32)

In [58]:
def build_model():
    # create model
    model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1024, activation=tf.nn.leaky_relu, input_shape=[X_train.shape[1]]),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(512, activation=tf.nn.leaky_relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(256, activation=tf.nn.leaky_relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation=tf.nn.leaky_relu),
    tf.keras.layers.Dense(1)
    ])
    optimizer = tf.keras.optimizers.Adam(0.001)
    
    model.compile(loss='mean_squared_logarithmic_error',
              optimizer=optimizer,
              metrics=['mse'])

    return model

In [59]:
#early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_mean_squared_error', patience=20)

In [61]:
k = 10
num_val = len(X_train) // k
num_epochs = 1000
all_scores = []
for i in range(k):
    print('processing fold #', i)
    # Prepare the validation data: data from partition # k
    X_val = X_train[i * num_val: (i + 1) * num_val]
    Y_val = y_train[i * num_val: (i + 1) * num_val]

    # Prepare the training data: data from all other partitions
    X_train_part = np.concatenate(
        [X_train[:i * num_val],
         X_train[(i + 1) * num_val:]],
        axis=0)
    Y_train_part = np.concatenate(
        [y_train[:i * num_val],
         y_train[(i + 1) * num_val:]],
        axis=0)

    # Build the Keras model (already compiled)
    model = build_model()
    # Train the model (in silent mode, verbose=0)
    model.fit(X_train_part, Y_train_part,epochs=num_epochs,  batch_size=100 , verbose=0)
    # Evaluate the model on the validation data 
    val_loss, val_acc = model.evaluate(X_val, Y_val, verbose=0)
    all_scores.append(val_acc)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9


In [64]:
np.mean(all_scores)
np.sqrt(np.mean(all_scores))

139080.45