## Setup

In [4]:
# Import Dependencies.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import requests
import json

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
# Fetch the data from the API.
listings_json = requests.get("http://127.0.0.1:5000/api/v1.0/listings").json()

# Examine the data.
print(json.dumps(listings_json[0], indent=4, sort_keys=True))

{
    "address": "17452 NE GLISAN ST #7, Portland OR 97230",
    "bathrooms": 2.0,
    "bedrooms": 2,
    "built": 1988,
    "city": "Portland",
    "county": "Multnomah",
    "elementary_school": "Hartley",
    "high_school": "Reynolds",
    "home_type": "Manufactured - Double Wide Manufact",
    "lot_size": null,
    "middle_school": "Reynolds",
    "neighborhood": "unknown",
    "price": 72000,
    "square_feet": 1152,
    "zipcode": 97230
}


In [3]:
# Create a dataframe to use for our model.
data_df = pd.DataFrame(listings_json)

print(len(data_df))
data_df.head()

1822


Unnamed: 0,address,bathrooms,bedrooms,built,city,county,elementary_school,high_school,home_type,lot_size,middle_school,neighborhood,price,square_feet,zipcode
0,"17452 NE GLISAN ST #7, Portland OR 97230",2.0,2,1988,Portland,Multnomah,Hartley,Reynolds,Manufactured - Double Wide Manufact,,Reynolds,unknown,72000,1152,97230
1,"16000 SE POWELL BLVD 75, Portland OR 97236",2.0,3,1990,Portland,Multnomah,Powell Butte,Centennial,Manufactured - Double Wide Manufact,,Centennial,unknown,79950,1404,97236
2,"12846 SE RAMONA ST 6, Portland OR 97236",2.0,3,1997,Portland,Multnomah,Gilbert Hts,David Douglas,Manufactured - Double Wide Manufact,,Alice Ott,unknown,93900,1297,97236
3,"7720 S Macadam AVE 7, Portland OR 97219",3.0,3,1988,Portland,Multnomah,Other,Other,Floating Home - Contemporary,,Other,unknown,125000,2432,97219
4,"19609 NE Marine DR E-4, Portland OR 97230",1.0,1,1960,Portland,Multnomah,Salish Pond,Reynolds,Floating Home - Cabin,,Reynolds,unknown,129500,735,97230


## Data Preprocessing

In [25]:
# Make a copy of the original data frame to modify.
model_df = data_df

# Insert a lot value of 0 for condos and floating homes.
for index, row in model_df.iterrows():
    if ("Condo" in row["home_type"]) | ("Floating" in row["home_type"]):
        model_df.loc[index, "lot_size"] = 0
    else:
        pass

# Include only those columns that will be used in the deep learning model.
model_df = model_df.loc[:, ["bathrooms", "bedrooms", "built", "lot_size", "square_feet", "price"]]
# Chose not to include high_school due to lousy random forest fitting.
# Drop rows with NaN entries.
model_df.dropna(inplace=True)

# Bin prices into ten equal length ranges.
model_df["price_range"] = pd.qcut(model_df["price"], 5)
# Drop the original price data.
model_df.drop("price", axis=1, inplace=True)

# Check the model data.
print(len(model_df))
model_df.head()

1725


Unnamed: 0,bathrooms,bedrooms,built,lot_size,square_feet,price_range
3,3.0,3,1988,0.0,2432,"(124999.999, 349000.0]"
4,1.0,1,1960,0.0,735,"(124999.999, 349000.0]"
5,1.0,1,1974,0.0,720,"(124999.999, 349000.0]"
6,1.0,1,1927,0.0,382,"(124999.999, 349000.0]"
7,1.0,1,2004,0.0,513,"(124999.999, 349000.0]"


In [26]:
# Check the counts for price range to see if the bins need adjusting.
listing_count = model_df.groupby("price_range").count()
listing_count

Unnamed: 0_level_0,bathrooms,bedrooms,built,lot_size,square_feet
price_range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(124999.999, 349000.0]",347,347,347,347,347
"(349000.0, 449500.0]",344,344,344,344,344
"(449500.0, 614994.0]",344,344,344,344,344
"(614994.0, 835200.0]",345,345,345,345,345
"(835200.0, 4495000.0]",345,345,345,345,345


In [27]:
# Assign X (input) and y (target).

X = model_df.drop("price_range", axis=1)
y = model_df["price_range"]

In [28]:
# Split the data into training and testing

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [29]:
# Create a MinMaxScaler model and fit it to the training data

X_scaler = MinMaxScaler().fit(X_train)

In [30]:
# Transform the training and testing data using the X_scaler and y_scaler models.

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [31]:
# Label encode the target data.
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [32]:
# Convert encoded labels to one-hot encoding.
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

## Create a Deep Learning Model

In [35]:
# Create a deep learning Sequential model.
deep_model = Sequential()
deep_model.add(Dense(units=100, activation='relu', input_dim=5))
deep_model.add(Dense(units=100, activation='relu'))
deep_model.add(Dense(units=5, activation='softmax'))

In [36]:
# Compile and fit the model.
deep_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 1293 samples
Epoch 1/100
1293/1293 - 1s - loss: 1.5810 - accuracy: 0.3248
Epoch 2/100
1293/1293 - 0s - loss: 1.4611 - accuracy: 0.4014
Epoch 3/100
1293/1293 - 0s - loss: 1.3200 - accuracy: 0.4331
Epoch 4/100
1293/1293 - 0s - loss: 1.2341 - accuracy: 0.4927
Epoch 5/100
1293/1293 - 0s - loss: 1.1934 - accuracy: 0.4934
Epoch 6/100
1293/1293 - 0s - loss: 1.1675 - accuracy: 0.5104
Epoch 7/100
1293/1293 - 0s - loss: 1.1489 - accuracy: 0.5151
Epoch 8/100
1293/1293 - 0s - loss: 1.1356 - accuracy: 0.5166
Epoch 9/100
1293/1293 - 0s - loss: 1.1115 - accuracy: 0.5367
Epoch 10/100
1293/1293 - 0s - loss: 1.1015 - accuracy: 0.5445
Epoch 11/100
1293/1293 - 0s - loss: 1.0834 - accuracy: 0.5476
Epoch 12/100
1293/1293 - 0s - loss: 1.0756 - accuracy: 0.5514
Epoch 13/100
1293/1293 - 0s - loss: 1.0639 - accuracy: 0.5468
Epoch 14/100
1293/1293 - 0s - loss: 1.0701 - accuracy: 0.5522
Epoch 15/100
1293/1293 - 0s - loss: 1.0532 - accuracy: 0.5584
Epoch 16/100
1293/1293 - 0s - loss: 1.0375 - accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x16c521ec808>

## Quantify our Trained Model

In [37]:
model_loss, model_accuracy = deep_model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

432/432 - 0s - loss: 1.0460 - accuracy: 0.5509
Normal Neural Network - Loss: 1.0460260929884735, Accuracy: 0.5509259104728699


## Make Predictions

In [38]:
# Use the first 5 test data values to make a prediction and compare it to the actual labels
encoded_predictions = deep_model.predict_classes(X_test_scaled[:10])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")

Predicted classes: [Interval(449500.0, 614994.0, closed='right')
 Interval(124999.999, 349000.0, closed='right')
 Interval(124999.999, 349000.0, closed='right')
 Interval(835200.0, 4495000.0, closed='right')
 Interval(124999.999, 349000.0, closed='right')
 Interval(835200.0, 4495000.0, closed='right')
 Interval(124999.999, 349000.0, closed='right')
 Interval(449500.0, 614994.0, closed='right')
 Interval(349000.0, 449500.0, closed='right')
 Interval(614994.0, 835200.0, closed='right')]
Actual Labels: [Interval(349000.0, 449500.0, closed='right'), Interval(349000.0, 449500.0, closed='right'), Interval(124999.999, 349000.0, closed='right'), Interval(835200.0, 4495000.0, closed='right'), Interval(349000.0, 449500.0, closed='right'), Interval(614994.0, 835200.0, closed='right'), Interval(124999.999, 349000.0, closed='right'), Interval(449500.0, 614994.0, closed='right'), Interval(835200.0, 4495000.0, closed='right'), Interval(614994.0, 835200.0, closed='right')]
