In [3]:
import csv
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, RMSprop, Nadam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [10]:
def drop_non_numeric_and_na_cols(df):
    # Get the list of non-numeric columns
    df = df.dropna(subset=['Long','Lat','Rooms','Floor','Floors']).reset_index(drop=True)
    non_numeric_cols = list(df.select_dtypes(exclude=['number']).columns)
    
    # Get the list of columns that contain NaNs
    na_cols = list(df.columns[df.isna().any()])
    
    # Combine the two lists and drop the columns from the DataFrame
    cols_to_drop = list(set(non_numeric_cols) | set(na_cols))
    df = df.drop(cols_to_drop, axis=1)
    
    return df



def neural_network_model(df):
    X = df.drop(['Price'], axis=1).values
    y = df['Price'].values
    
    # Normalize the input data
    scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    
    model = Sequential()
    model.add(Dense(64, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='linear'))

    optimizer = RMSprop(learning_rate=0.001)
    model.compile(loss='mean_absolute_error', optimizer=optimizer)

    # Use early stopping to prevent overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
    history = model.fit(X, y, epochs=200, batch_size=32, validation_split=0.1, callbacks=[early_stopping])

    # Evaluate the model with different metrics
    loss = model.evaluate(X, y)
    print('MAE:', loss)
    y_pred = model.predict(X)

    r2 = r2_score(y, y_pred)
    print('R^2:', r2)
    
    return y_pred

df = pd.read_csv("../Data/Nadlan_clean.csv", index_col=0)
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y')
df['Year'] = df['Date'].dt.strftime('%Y')
df['Year'] = df['Year'].astype(int)
df = df[df['Year'] < 2022]
df = df[df['Year'] > 2012]

df = drop_non_numeric_and_na_cols(df)

y_pred = neural_network_model(df)
y_pred

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 25: early stopping
MAE: 790049.6875
R^2: 0.17104815353523817


array([[2383493.5],
       [3596331. ],
       [3104690.2],
       ...,
       [1863857.6],
       [2245902.5],
       [1633139.5]], dtype=float32)