## Model Development

Now that we have our cleaned data with required features, lets proceed with model development 

Importing Libraries

In [1]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import pickle

from xgboost import XGBRegressor
#from lightgbm import LGBMRegressor
#import lightgbm as ltb

from math import sqrt

%matplotlib inline

Loading our data into dataframe

In [2]:
df_properties = pd.read_csv('cleaned_property_data_buy.csv')
#df_properties = df_properties.dropna(column='amenities',axis=1)
df_properties = df_properties.drop(columns = ['amenities','price_per_sqft','neighborhood'],axis=1)

df_properties = pd.get_dummies(df_properties, columns=['quality', ])
df_properties.head()
#partly_furnished

Unnamed: 0,id,latitude,longitude,price,size_in_sqft,no_of_bedrooms,no_of_bathrooms,partly_furnished,balcony,barbecue_area,...,shared_spa,study,vastu_compliant,view_of_landmark,view_of_water,walk_in_closet,quality_High,quality_Low,quality_Medium,quality_Ultra
0,8942144,25.190442,55.283397,2183160,1040,2,2,True,True,False,...,False,False,False,True,False,False,0,0,1,0
1,8942145,25.190442,55.283397,2190930,1043,2,2,True,True,False,...,False,False,False,True,False,False,0,0,1,0
2,8942143,25.190442,55.283397,2024610,964,2,2,True,True,False,...,False,True,False,True,True,True,1,0,0,0
3,8942138,24.311142,54.621775,1550000,185,3,5,False,True,True,...,False,True,False,False,True,False,1,0,0,0
4,8942134,25.09938,55.141275,6699555,2000,3,3,False,True,True,...,False,False,False,False,True,False,0,0,1,0


In [3]:
#df_properties.price = df_properties.price * 0.27

In [4]:

df_properties.price.describe()

count    2.024000e+03
mean     2.516963e+06
std      4.145156e+06
min      2.150000e+05
25%      9.000000e+05
50%      1.600000e+06
75%      2.794848e+06
max      9.999989e+07
Name: price, dtype: float64

Training dataframe

In [5]:
df_properties.head()

Unnamed: 0,id,latitude,longitude,price,size_in_sqft,no_of_bedrooms,no_of_bathrooms,partly_furnished,balcony,barbecue_area,...,shared_spa,study,vastu_compliant,view_of_landmark,view_of_water,walk_in_closet,quality_High,quality_Low,quality_Medium,quality_Ultra
0,8942144,25.190442,55.283397,2183160,1040,2,2,True,True,False,...,False,False,False,True,False,False,0,0,1,0
1,8942145,25.190442,55.283397,2190930,1043,2,2,True,True,False,...,False,False,False,True,False,False,0,0,1,0
2,8942143,25.190442,55.283397,2024610,964,2,2,True,True,False,...,False,True,False,True,True,True,1,0,0,0
3,8942138,24.311142,54.621775,1550000,185,3,5,False,True,True,...,False,True,False,False,True,False,1,0,0,0
4,8942134,25.09938,55.141275,6699555,2000,3,3,False,True,True,...,False,False,False,False,True,False,0,0,1,0


Testing dataframe

In [6]:
df_properties.shape

(2024, 35)

Target feature for prediction

In [7]:
y_train = df_properties['price']#.values
#type(y_train)

Excluding target feature from the training dataset

In [8]:
y_train.head()

0    2183160
1    2190930
2    2024610
3    1550000
4    6699555
Name: price, dtype: int64

In [9]:
X = df_properties.drop(columns=['price','id'],axis=1)
#X['maid_room'] = df["maid_room"].astype(int)
#X['maid_room'] = X['maid_room'].replace({True: 1, False: 0})
X = X.replace({False: 0, True: 1}, inplace=False)
#unfurnished
#X['partly_furnished'] = X['partly_furnished'].replace({1: 0, 0: 1})
#X = X[['latitude','longitude','size_in_sqft','no_of_bedrooms','no_of_bathrooms','covered_parking','unfurnished','concierge','kitchen_appliances','pets_allowed','view_of_water']]
y = y_train

In [10]:
X.head()

Unnamed: 0,latitude,longitude,size_in_sqft,no_of_bedrooms,no_of_bathrooms,partly_furnished,balcony,barbecue_area,built_in_wardrobes,central_ac,...,shared_spa,study,vastu_compliant,view_of_landmark,view_of_water,walk_in_closet,quality_High,quality_Low,quality_Medium,quality_Ultra
0,25.190442,55.283397,1040,2,2,1,1,0,1,1,...,0,0,0,1,0,0,0,0,1,0
1,25.190442,55.283397,1043,2,2,1,1,0,1,1,...,0,0,0,1,0,0,0,0,1,0
2,25.190442,55.283397,964,2,2,1,1,0,1,1,...,0,1,0,1,1,1,1,0,0,0
3,24.311142,54.621775,185,3,5,0,1,1,1,1,...,0,1,0,0,1,0,1,0,0,0
4,25.09938,55.141275,2000,3,3,0,1,1,1,1,...,0,0,0,0,1,0,0,0,1,0


Splitting training dataset for model training.

In [11]:
# Split into train & test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [12]:
X_train.head()

Unnamed: 0,latitude,longitude,size_in_sqft,no_of_bedrooms,no_of_bathrooms,partly_furnished,balcony,barbecue_area,built_in_wardrobes,central_ac,...,shared_spa,study,vastu_compliant,view_of_landmark,view_of_water,walk_in_closet,quality_High,quality_Low,quality_Medium,quality_Ultra
2006,25.099793,55.141746,1392,2,3,0,1,0,1,1,...,0,0,0,0,0,0,0,0,1,0
1251,25.691869,55.78512,821,1,2,0,0,0,1,1,...,0,0,0,0,1,0,0,0,1,0
1716,24.484947,54.597985,1614,2,3,0,1,0,1,1,...,0,0,0,0,1,0,0,0,1,0
317,25.119354,55.37796,441,0,1,0,1,1,0,1,...,0,0,0,0,0,0,0,0,1,0
359,25.010196,55.291467,2485,3,3,1,1,1,1,1,...,0,0,0,0,0,0,0,0,1,0


In [13]:
X.shape

(2024, 33)

In [14]:
X_train.shape

(1619, 33)

For prediction modelling we will use following models:
    
- LGBMRegressor
- RandomForestRegressor
- XGBRegressor
- LGBMRegressor

We will train our models using training dataset consisting of 1619 properties and then test them against our testing dataset with 2024-1619 properties.

We will check our R2 score for each model and we will select the predictions from the model which is closest to value of 1. R2 scores range from 0 to 1.

Based on best R2 score we will match actual values with predicted values and see the percentage difference.

### GradientBoostingRegressor

#### Training

In [15]:
from sklearn.metrics import mean_absolute_error,mean_absolute_percentage_error

In [16]:

#np.random.seed(42)

for i in [0.001, 0.003, 0.1, 0.3, 1, 2, 4, 6]:
    for j in [100, 250, 500, 700, 750, 800, 850, 900, 950, 1000, 1050]:
       gradient = GradientBoostingRegressor(n_estimators = j, learning_rate=i)
       print('learning rate is: '+ str(i)+ ' and n_estimators is: '+str(j))
       gradient.fit(X_train, y_train)
       gradient_predictions = gradient.predict(X_test)
       gradient_r2_score = r2_score(y_test, gradient_predictions)
       print('R2 Score for GradientBoostingRegressor', gradient_r2_score)
       
       print("MAE is",mean_absolute_error(y_test, gradient_predictions))
       print("MAPE is",mean_absolute_percentage_error(y_test, gradient_predictions))
       print('\n')
 

learning rate is: 0.001 and n_estimators is: 100
R2 Score for GradientBoostingRegressor 0.07310310940402709
MAE is 1627969.4995601769
MAPE is 1.171382525392962


learning rate is: 0.001 and n_estimators is: 250
R2 Score for GradientBoostingRegressor 0.17095661707508925
MAE is 1530116.9800122415
MAPE is 1.0936718569097463


learning rate is: 0.001 and n_estimators is: 500
R2 Score for GradientBoostingRegressor 0.31066381564116785
MAE is 1372057.0903826002
MAPE is 0.9619472634899982


learning rate is: 0.001 and n_estimators is: 700
R2 Score for GradientBoostingRegressor 0.3953051897385841
MAE is 1282976.2410132876
MAPE is 0.878699106663099


learning rate is: 0.001 and n_estimators is: 750
R2 Score for GradientBoostingRegressor 0.4131236052712266
MAE is 1263522.5325728946
MAPE is 0.859745464524499


learning rate is: 0.001 and n_estimators is: 800
R2 Score for GradientBoostingRegressor 0.42951870227417144
MAE is 1245246.7241370566
MAPE is 0.8415686832187947


learning rate is: 0.001 and

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_we

R2 Score for GradientBoostingRegressor -inf
MAE is 2.0052785810578792e+232
MAPE is 1.5885028506568298e+226


learning rate is: 4 and n_estimators is: 700


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight * ((y - raw_predictions.ravel()) ** 2))
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  * np.sum(sample_weight *

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

#### Testing

In [17]:
#gradient_predictions = gradient.predict(df_test)
gradient = GradientBoostingRegressor(n_estimators = 750, learning_rate=0.3)
gradient.fit(X_train, y_train)
       
gradient_predictions = gradient.predict(X_test)
gradient_r2_score = r2_score(y_test, gradient_predictions)
print('R2 Score for GradientBoostingRegressor', gradient_r2_score)

from sklearn.metrics import mean_absolute_error

print("MAE",mean_absolute_error(y_test, gradient_predictions))
print("MAPE is",mean_absolute_percentage_error(y_test, gradient_predictions))
filename = 'GradientBoostingRegressor2.sav'
pickle.dump(gradient, open(filename, 'wb'))

R2 Score for GradientBoostingRegressor 0.8683951320241834
MAE 520617.9762422894
MAPE is 0.22799018307564872


In [18]:
"""
param_test2 = {'max_depth':range(5,16,2), 'min_samples_split':range(200,1001,200)}
gradient = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=0.1, n_estimators=60, max_features='sqrt', subsample=0.8, random_state=10), 
param_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)
gradient.fit(train[predictors],train[target])
gradient.grid_scores_, gradient.best_params_, gradient.best_score_
"""

"\nparam_test2 = {'max_depth':range(5,16,2), 'min_samples_split':range(200,1001,200)}\ngradient = GridSearchCV(estimator = GradientBoostingClassifier(learning_rate=0.1, n_estimators=60, max_features='sqrt', subsample=0.8, random_state=10), \nparam_grid = param_test2, scoring='roc_auc',n_jobs=4,iid=False, cv=5)\ngradient.fit(train[predictors],train[target])\ngradient.grid_scores_, gradient.best_params_, gradient.best_score_\n"

### RandomForestRegressor

#### Training

In [19]:

for j in [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]:
       random_forest = RandomForestRegressor(max_depth = j)#, learning_rate=i)
       print(' and max_depth is: '+str(j))
       random_forest.fit(X_train, y_train)
       forest_prediction = random_forest.predict(X_test)
       forest_r2_score = r2_score(y_test, forest_prediction)
       print('R2 Score for RandomForestRegressor', forest_r2_score)
       
       print("MAE",mean_absolute_error(y_test, forest_prediction))
       print("MAPE is",mean_absolute_percentage_error(y_test, forest_prediction))
       print('\n')
 


 and max_depth is: 2
R2 Score for RandomForestRegressor 0.4799247269642114
MAE 1254621.686911988
MAPE is 0.8233146885433661


 and max_depth is: 4
R2 Score for RandomForestRegressor 0.5775626223431245
MAE 1004788.237887729
MAPE is 0.5110562373929232


 and max_depth is: 6
R2 Score for RandomForestRegressor 0.6924589865860378
MAE 804030.4767296308
MAPE is 0.3603896225726669


 and max_depth is: 8
R2 Score for RandomForestRegressor 0.6639439767836548
MAE 724622.9225690588
MAPE is 0.290441675629927


 and max_depth is: 10
R2 Score for RandomForestRegressor 0.7581175408349163
MAE 652712.7325864919
MAPE is 0.2547853634984122


 and max_depth is: 12
R2 Score for RandomForestRegressor 0.7644816086544737
MAE 624990.1713286581
MAPE is 0.2411558262659738


 and max_depth is: 14
R2 Score for RandomForestRegressor 0.706979133143204
MAE 635766.377754628
MAPE is 0.23635044028824


 and max_depth is: 16
R2 Score for RandomForestRegressor 0.7620883135545806
MAE 616304.8541772509
MAPE is 0.233141638148

In [20]:
random_forest = RandomForestRegressor(max_depth=16)
random_forest.fit(X_train, y_train)
#forest_prediction = random_forest.predict(X_test)

RandomForestRegressor(max_depth=16)

#### Testing

In [21]:
forest_prediction = random_forest.predict(X_test)
forest_r2_score = r2_score(y_test, forest_prediction)
print('R2 Score for RandomForestRegressor', forest_r2_score)

from sklearn.metrics import mean_absolute_error
print("MAE",mean_absolute_error(y_test, forest_prediction))
print("MAPE is",mean_absolute_percentage_error(y_test, forest_prediction))

filename = 'random_forest2.sav'
pickle.dump(random_forest, open(filename, 'wb'))

R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


### XGBRegressor

#### Training

In [22]:
for i in [0.001, 0.003, 0.1, 0.3, 1, 2, 4, 6]:
    for j in [50, 100, 250, 500, 700, 750, 800, 850, 900, 950, 1000, 1050]:
       xgbr_regressor = XGBRegressor(learning_rate=i, n_estimators=j, n_jobs=-1)
       xgbr_regressor.fit(X_train, y_train)
       
       xgbr_regressor = random_forest.predict(X_test)
       xgbreg_r2_score = r2_score(y_test, xgbr_regressor)
       print('learning rate is '+str(i)+' nestimators is '+str(j))
       print('R2 Score for RandomForestRegressor', xgbreg_r2_score)
       
       print("MAE",mean_absolute_error(y_test, xgbr_regressor))
       print("MAPE is",mean_absolute_percentage_error(y_test, xgbr_regressor))
       print('\n')

learning rate is 0.001 nestimators is 50
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


learning rate is 0.001 nestimators is 100
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


learning rate is 0.001 nestimators is 250
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


learning rate is 0.001 nestimators is 500
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


learning rate is 0.001 nestimators is 700
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


learning rate is 0.001 nestimators is 750
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MAPE is 0.23464638178440075


learning rate is 0.001 nestimators is 800
R2 Score for RandomForestRegressor 0.776499832119214
MAE 607849.4633906487
MA

In [23]:
xgbr_regressor = XGBRegressor(learning_rate=0.1, n_estimators=100, n_jobs=-1)
xgbr_regressor.fit(X_train, y_train)


XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100,
             n_jobs=-1, num_parallel_tree=1, predictor='auto', random_state=0,
             reg_alpha=0, reg_lambda=1, ...)

In [24]:
"""
xgbr_regressor.feature_importances_
xgbr_regressor.feature_names_in_

fig = plt.figure(figsize=(20, 25))
plt.barh(xgbr_regressor.feature_names_in_, xgbr_regressor.feature_importances_)
plt.xlabel("Courses offered")
plt.ylabel("No. of students enrolled")
plt.title("Students enrolled in different courses")
plt.show()
"""

'\nxgbr_regressor.feature_importances_\nxgbr_regressor.feature_names_in_\n\nfig = plt.figure(figsize=(20, 25))\nplt.barh(xgbr_regressor.feature_names_in_, xgbr_regressor.feature_importances_)\nplt.xlabel("Courses offered")\nplt.ylabel("No. of students enrolled")\nplt.title("Students enrolled in different courses")\nplt.show()\n'

#### Testing

In [25]:
xgbr_regressor_prediction = xgbr_regressor.predict(X_test)
xgbr_regresso_r2_score = r2_score(y_test, xgbr_regressor_prediction)
print('R2 Score for XGBRegressor', xgbr_regresso_r2_score)

from sklearn.metrics import mean_absolute_error
print("MAE",mean_absolute_error(y_test, xgbr_regressor_prediction))

print("MAPE is",mean_absolute_percentage_error(y_test, xgbr_regressor_prediction))

filename = 'xgbr_regressor2.sav'
pickle.dump(xgbr_regressor, open(filename, 'wb'))

R2 Score for XGBRegressor 0.7854099502434347
MAE 606546.3154320988
MAPE is 0.23690135235986806


In [26]:
"""

from xgboost import plot_importance


fig = plt.figure(figsize=(115, 95))
plot_importance(xgbr_regressor,height=25.2)
plt.show()
"""
feature_important = xgbr_regressor.get_booster().get_score(importance_type='weight')
keys = list(feature_important.keys())
values = list(feature_important.values())

data = pd.DataFrame(data=values, index=keys, columns=["score"]).sort_values(by = "score", ascending=False)
data.nlargest(40, columns="score").plot(kind='barh', figsize = (30,20)) ## plot top 40 features


<matplotlib.axes._subplots.AxesSubplot at 0x25701487828>

In [27]:
"""
data = pd.DataFrame(columns=[X.columns.values,'bonjour'])
data.loc['latitude']=1.2
"""

"\ndata = pd.DataFrame(columns=[X.columns.values,'bonjour'])\ndata.loc['latitude']=1.2\n"

In [28]:
data.head()

Unnamed: 0,score
latitude,879.0
size_in_sqft,730.0
longitude,566.0
no_of_bedrooms,153.0
no_of_bathrooms,65.0


In [29]:
#data.

In [30]:
"""
import gradio as gr

def greet(latitude=0,longitude=0,size_in_sqft=0,no_of_bedrooms=1,no_of_bathrooms=1, view_of_water=True, unfurnished=False,covered_parking=False,pets_allowed=False, kitchen_appliances=False, concierge=False):
    print('bonjour')
    tab =['latitude','longitude','size_in_sqft','no_of_bedrooms','no_of_bathrooms','covered_parking','unfurnished','concierge','kitchen_appliances','pets_allowed','view_of_water']
    print(latitude)
    print(longitude)
    print(unfurnished)
    dt = pd.DataFrame({'latitude': [latitude],
                        'longitude': [longitude],
                        'size_in_sqft':  [size_in_sqft],
                        'no_of_bedrooms':  [no_of_bedrooms],
                        'no_of_bathrooms':  [no_of_bathrooms],
                        'view_of_water':  [1] if view_of_water==True else [0],
                        'unfurnished':  [1] if unfurnished==True else [0],
                        'covered_parking':  [1] if covered_parking==True else [0],
                        'pets_allowed': [1] if pets_allowed==True else [0],
                        'kitchen_appliances': [1] if kitchen_appliances==True else [0],
                        'concierge': [1] if concierge==True else [0]
                        } )
    
         #print('X.columns',X.columns)
    
    dt.head        
    print(dt['latitude'])
    print(X.shape)
    
    
    filename = 'GradientBoostingRegressor2.sav'
    loaded_model = pickle.load(open(filename, 'rb'))
    result = loaded_model.predict(dt)
    result = result[0] *0.27
         
    print('result',result) 
    #dt.head()
    return result   
    #gradient_predictions = gradient.predict(tab)
    #return gradient_predictions

demo = gr.Interface(
    fn=greet,
    inputs=["number", "number", "number", "number","number","checkbox","checkbox","checkbox","checkbox","checkbox","checkbox"],
    outputs=["number"],
)
demo.launch()
"""

'\nimport gradio as gr\n\ndef greet(latitude=0,longitude=0,size_in_sqft=0,no_of_bedrooms=1,no_of_bathrooms=1, view_of_water=True, unfurnished=False,covered_parking=False,pets_allowed=False, kitchen_appliances=False, concierge=False):\n    print(\'bonjour\')\n    tab =[\'latitude\',\'longitude\',\'size_in_sqft\',\'no_of_bedrooms\',\'no_of_bathrooms\',\'covered_parking\',\'unfurnished\',\'concierge\',\'kitchen_appliances\',\'pets_allowed\',\'view_of_water\']\n    print(latitude)\n    print(longitude)\n    print(unfurnished)\n    dt = pd.DataFrame({\'latitude\': [latitude],\n                        \'longitude\': [longitude],\n                        \'size_in_sqft\':  [size_in_sqft],\n                        \'no_of_bedrooms\':  [no_of_bedrooms],\n                        \'no_of_bathrooms\':  [no_of_bathrooms],\n                        \'view_of_water\':  [1] if view_of_water==True else [0],\n                        \'unfurnished\':  [1] if unfurnished==True else [0],\n                  

In [31]:
#keras
from tensorflow import keras
from PIL import ImageFont
#from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.preprocessing.text import one_hot
from keras.layers import Dense, Activation, Flatten

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error 
from matplotlib import pyplot as plt
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [32]:
"""
NN_model = Sequential()

# The Input Layer :
NN_model.add(Dense(128, kernel_initializer='normal',input_dim = X.shape[1], activation='relu'))

# The Hidden Layers :
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
#NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
#NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))


# The Output Layer :
NN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

# Compile the network :
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
NN_model.summary()
"""

"\nNN_model = Sequential()\n\n# The Input Layer :\nNN_model.add(Dense(128, kernel_initializer='normal',input_dim = X.shape[1], activation='relu'))\n\n# The Hidden Layers :\nNN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))\n#NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))\n#NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))\n\n\n# The Output Layer :\nNN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))\n\n# Compile the network :\nNN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])\nNN_model.summary()\n"

In [33]:
"""
checkpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
callbacks_list = [checkpoint]
"""

"\ncheckpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' \ncheckpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')\ncallbacks_list = [checkpoint]\n"

In [34]:
"""history = NN_model.fit(X, y, epochs=200, batch_size=32, validation_split = 0.2, callbacks=callbacks_list)"""

'history = NN_model.fit(X, y, epochs=200, batch_size=32, validation_split = 0.2, callbacks=callbacks_list)'

In [35]:
"""import seaborn as sns
import matplotlib.pyplot as plt

history_df = pd.DataFrame.from_dict(history.history)
sns.lineplot(data=history_df[['mean_absolute_error', 'val_mean_absolute_error']])
plt.xlabel("epochs")
plt.ylabel("MAE")"""

'import seaborn as sns\nimport matplotlib.pyplot as plt\n\nhistory_df = pd.DataFrame.from_dict(history.history)\nsns.lineplot(data=history_df[[\'mean_absolute_error\', \'val_mean_absolute_error\']])\nplt.xlabel("epochs")\nplt.ylabel("MAE")'