In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
import seaborn as sns
import numpy as np

In [None]:
# Load the dataset
house_data = pd.read_csv('kc_house_data.csv')
house_data.head()


In [None]:
ax= house_data.price.plot.hist(bins=50)
ax.text(0.5, 0.5, 'Price', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=15)
ax.text(0.5, 0.4, "Skewness: %f" % house_data.price.skew(), horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=12)

In [None]:
# Select important features and the target variable
selected_features = ['bedrooms', 'bathrooms', 'sqft_living', 'floors', 'grade']
target = 'price'

# Clean the data
clean_data = house_data[selected_features + [target]].dropna()

# view the cleaned data
clean_data.head()

In [5]:
# Split data into features (X) and target (y)
X = clean_data[selected_features]
y = clean_data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# log transform the target variable
y_train_log = np.log1p(y_train)  # Apply log1p to handle zero values
y_test_log = np.log1p(y_test)


In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
ax1.hist(y_train, bins=50)
ax1.set_title('Original Target')
ax2.hist(y_train_log, bins=50)
ax2.set_title('Log Transformed Target')
plt.show()


In [None]:
model = tf.keras.Sequential([
    tf.keras.Input(shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu',),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1)
])
model.summary()

In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train_scaled, y_train_log, validation_split=0.2, epochs=100, batch_size=32, callbacks=[early_stop])

In [None]:
# Plot training history
plt.plot(history.history['loss'], label='train loss', color='r',)
plt.plot(history.history['val_loss'], label='val loss', color='b')
plt.legend()
plt.show()

In [None]:
model.evaluate(X_test_scaled, y_test_log)

In [None]:
y_pred = model.predict(X_test_scaled)
df = pd.DataFrame({'Actual': y_test_log, 'Predicted': y_pred.flatten()})
# visualize the comparison between the actual and predicted values
fig, ax = plt.subplots()
df.head(25).plot(kind='bar',figsize=(16,6),ax=ax)
plt.show()

# kde
fig, ax = plt.subplots(figsize=(16, 5))
sns.histplot(data=df, fill=True, kde=True, palette="RdBu", alpha=.5, linewidth=0, ax=ax)
plt.show()

In [None]:
# Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open('house_price_model.tflite', 'wb') as f:
    f.write(tflite_model)

In [14]:
def predict_price(bedrooms, bathrooms, sqft_living, floors, grade):
    data_dict = {
        'bedrooms': [bedrooms],
        'bathrooms': [bathrooms],
        'sqft_living': [sqft_living],
        'floors': [floors],
        'grade': [grade]
    }
    data = pd.DataFrame(data_dict)
    data_scaled = scaler.transform(data)
    price_log = model.predict(data_scaled)
    return np.expm1(price_log)[0][0]

In [None]:
# the min values of the features
scaler.data_max_.tolist()

In [None]:
# the max values of the features
scaler.data_min_.tolist()

In [None]:
selected_features = ['bedrooms', 'bathrooms', 'sqft_living', 'floors', 'grade','price']
house_data[selected_features].values.tolist()