In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

## Deep learning models

In [2]:
import zipfile

with zipfile.ZipFile("../data/cleaned/resale_price_clean_final.csv.zip", 'r') as zip_ref:
    file_name = zip_ref.namelist()[0] 
    with zip_ref.open(file_name) as file:
        df_resale = pd.read_csv(file)

df_resale = pd.get_dummies(df_resale, columns=['town'], prefix='town')
df_resale

Unnamed: 0,storey_range,floor_area_sqm,remaining_lease,resale_price,lat,lon,nearest_mrt_distance,nearest_bus_distance,education_score,shopping_score,...,town_PASIR RIS,town_PUNGGOL,town_QUEENSTOWN,town_SEMBAWANG,town_SENGKANG,town_SERANGOON,town_TAMPINES,town_TOA PAYOH,town_WOODLANDS,town_YISHUN
0,0.2500,0.083408,0.312121,0.131278,0.000000,0.456109,0.159033,0.226134,0.197110,0.225227,...,False,False,False,False,False,False,False,False,False,False
1,0.1250,0.083408,0.309346,0.131197,0.000000,0.456109,0.159033,0.226134,0.197110,0.225227,...,False,False,False,False,False,False,False,False,False,False
2,0.0625,0.083408,0.312121,0.127206,0.002888,0.454289,0.170487,0.124639,0.214251,0.226183,...,False,False,False,False,False,False,False,False,False,False
3,0.1875,0.116175,0.299116,0.171393,0.002888,0.454289,0.170487,0.124639,0.214251,0.226183,...,False,False,False,False,False,False,False,False,False,False
4,0.1250,0.101281,0.310733,0.190853,0.005513,0.415298,0.035276,0.242965,0.144639,0.158371,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196981,0.0625,0.235329,0.663256,0.263095,1.000000,0.429908,0.288495,0.270891,0.382806,0.220513,...,False,False,False,True,False,False,False,False,False,False
196982,0.1250,0.235329,0.658921,0.277557,1.000000,0.429908,0.288495,0.270891,0.382806,0.220513,...,False,False,False,True,False,False,False,False,False,False
196983,0.2500,0.178731,0.657534,0.240401,1.000000,0.429908,0.288495,0.270891,0.382806,0.220513,...,False,False,False,True,False,False,False,False,False,False
196984,0.2500,0.238308,0.654586,0.329079,1.000000,0.429908,0.288495,0.270891,0.382806,0.220513,...,False,False,False,True,False,False,False,False,False,False


## Preprocessing

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

X = df_resale.drop(columns=['resale_price'])
y = df_resale['resale_price']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_rnn = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_rnn = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))


## Model Definition

### Simple Neural Network

In [4]:
nn_model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

nn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
nn_model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

nn_predictions = nn_model.predict(X_test_scaled)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1232/1232[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235us/step


### RNN

In [5]:
rnn_model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),
    tf.keras.layers.Dense(1)
])

rnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
rnn_model.fit(X_train_rnn, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

# Predictions
rnn_predictions = rnn_model.predict(X_test_rnn)


  super().__init__(**kwargs)


[1m1232/1232[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254us/step


### LSTM

In [6]:
lstm_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(50, activation='tanh', return_sequences=True, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),
    tf.keras.layers.LSTM(50, activation='tanh'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
])

lstm_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='mse', metrics=['mae'])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

lstm_model.fit(X_train_rnn, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping])


Epoch 1/50


  super().__init__(**kwargs)


[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 944us/step - loss: 0.0084 - mae: 0.0534 - val_loss: 0.0038 - val_mae: 0.0438
Epoch 2/50
[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 807us/step - loss: 0.0014 - mae: 0.0264 - val_loss: 0.0023 - val_mae: 0.0395
Epoch 3/50
[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 834us/step - loss: 0.0011 - mae: 0.0243 - val_loss: 0.0024 - val_mae: 0.0388
Epoch 4/50
[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 886us/step - loss: 0.0011 - mae: 0.0237 - val_loss: 0.0024 - val_mae: 0.0390
Epoch 5/50
[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 859us/step - loss: 9.8223e-04 - mae: 0.0229 - val_loss: 0.0026 - val_mae: 0.0393
Epoch 6/50
[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 909us/step - loss: 9.4341e-04 - mae: 0.0224 - val_loss: 0.0032 - val_mae: 0.0433
Epoch 7/50
[1m3940/3940[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x3559f9910>

In [7]:
# Predictions
lstm_predictions = lstm_model.predict(X_test_rnn)

[1m1232/1232[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 368us/step


## Evaluation

In [8]:
def evaluate_model(true_values, predictions):
    r2 = r2_score(true_values, predictions)
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    mae = mean_absolute_error(true_values, predictions)
    return r2, rmse, mae

nn_r2, nn_rmse, nn_mae = evaluate_model(y_test, nn_predictions)
print(f"Neural Network - R²: {nn_r2:.4f}, RMSE: {nn_rmse:.4f}, MAE: {nn_mae:.4f}")

rnn_r2, rnn_rmse, rnn_mae = evaluate_model(y_test, rnn_predictions)
print(f"RNN - R²: {rnn_r2:.4f}, RMSE: {rnn_rmse:.4f}, MAE: {rnn_mae:.4f}")

lstm_r2, lstm_rmse, lstm_mae = evaluate_model(y_test, lstm_predictions)
print(f"LSTM - R²: {lstm_r2:.4f}, RMSE: {lstm_rmse:.4f}, MAE: {lstm_mae:.4f}")


Neural Network - R²: 0.8517, RMSE: 0.0367, MAE: 0.0265
RNN - R²: 0.4397, RMSE: 0.0713, MAE: 0.0489
LSTM - R²: 0.6307, RMSE: 0.0579, MAE: 0.0442


Without year and month data

Neural Network - R²: 0.8041, RMSE: 0.0422, MAE: 0.0316
RNN - R²: 0.8261, RMSE: 0.0397, MAE: 0.0312
LSTM - R²: 0.5895, RMSE: 0.0611, MAE: 0.0474

With:

Neural Network - R²: 0.9082, RMSE: 0.0289, MAE: 0.0219
RNN - R²: 0.7378, RMSE: 0.0488, MAE: 0.0399
LSTM - R²: 0.7992, RMSE: 0.0427, MAE: 0.0327