In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
housing = pd.read_csv('from-jan-2015-onward-scaled.csv')

In [3]:
round(housing.head(),2)

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,scaled_floor_area_sqm,scaled_lease_commence_date,scaled_remaining_lease,scaled_resale_price,scaled_storey_range,scaled_latitude,scaled_longitude,minDistanceFromMall,minDistanceFromMrt,minDistFromSch
0,2015-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,8,60.0,Improved,1986,70.0,...,-1.54,-0.57,-0.36,-1.24,-0.08,0.19,-0.02,893.28,988.96,210.9
1,2015-01,ANG MO KIO,3 ROOM,541,ANG MO KIO AVE 10,2,68.0,New Generation,1981,65.0,...,-1.21,-0.98,-0.78,-1.1,-1.15,0.17,0.23,813.15,718.27,420.36
2,2015-01,ANG MO KIO,3 ROOM,163,ANG MO KIO AVE 4,2,69.0,New Generation,1980,64.0,...,-1.17,-1.06,-0.86,-1.03,-1.15,0.16,-0.01,796.22,1063.13,350.03
3,2015-01,ANG MO KIO,3 ROOM,446,ANG MO KIO AVE 10,2,68.0,New Generation,1979,63.0,...,-1.21,-1.14,-0.94,-1.0,-1.15,0.02,0.23,706.92,619.75,344.59
4,2015-01,ANG MO KIO,3 ROOM,557,ANG MO KIO AVE 10,8,68.0,New Generation,1980,64.0,...,-1.21,-1.06,-0.86,-1.0,-0.08,0.11,0.26,952.44,830.38,598.22


In [None]:
housing.columns

In [4]:
X = housing[['scaled_floor_area_sqm','scaled_remaining_lease','scaled_storey_range']]

y = housing['scaled_resale_price'] 

In [5]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 101)

In [6]:
from sklearn.linear_model import LinearRegression

# Linear Model

In [27]:
lm = LinearRegression()
lm.fit(X_train, y_train)

LinearRegression()

In [28]:
y_pred_linear = lm.predict(X_test)

In [29]:
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

In [30]:
print (f"Mean squared error for linear model: {mse_linear}")
print (f"R2 score for linear model: {r2_linear}")

Mean squared error for linear model: 0.457836698001071
R2 score for linear model: 0.5374896350823287


# Lasso

In [31]:
from sklearn.linear_model import Lasso
lassoReg = Lasso()
lassoReg.fit(X_train,y_train)

Lasso()

In [32]:
y_pred_lasso = lassoReg.predict(X_test)

In [33]:
lasso_mse = mean_squared_error(y_test, y_pred_lasso)
lasso_r2 = r2_score(y_test, y_pred_lasso)

In [34]:
print (f"Mean squared error for Lasso model: {lasso_mse}")
print (f"R2 score for Lasso model: {lasso_r2}")

Mean squared error for Lasso model: 0.989966448995565
R2 score for linear model: -7.217765693146028e-05


# Ridge

In [38]:
from sklearn.linear_model import Ridge
ridgeReg = Ridge()
ridgeReg.fit(X_train,y_train)


Ridge()

In [39]:
rpred=ridgeReg.predict(X_test)

In [40]:
ridge_mse = mean_squared_error(y_test, rpred)
ridge_r2 = r2_score(y_test,rpred)

In [41]:
print (f"Mean squared error for Ridge model: {ridge_mse}")
print (f"R2 score for Ridge model: {ridge_r2}")

Mean squared error for Lasso model: 0.45783661888035754
R2 score for linear model: 0.5374897150107185


# Elastic Net

In [43]:
from sklearn.linear_model import ElasticNet
ENreg = ElasticNet()
ENreg.fit(X_train,y_train)

ElasticNet()

In [44]:
Epred = ENreg.predict(X_test)

In [45]:
elastic_mse = mean_squared_error(y_test, Epred)
elastic_r2 = r2_score(y_test, Epred)

In [46]:
print (f"Mean squared error for Elastic model: {elastic_mse}")
print (f"R2 score for Elastic model: {elastic_r2}")

Mean squared error for Elastic model: 0.8794687730463163
R2 score for Elastic model: 0.11155347543940908


# Adaboost 

In [48]:
from sklearn.ensemble import AdaBoostRegressor
ada = AdaBoostRegressor()
ada.fit(X_train, y_train)

AdaBoostRegressor()

In [49]:
y_pred_ada = ada.predict(X_test)

In [50]:
ada_mse = mean_squared_error(y_test, y_pred_ada)
ada_r2 = r2_score(y_test, y_pred_ada)

In [51]:
print (f"Mean squared error for Adaboost: {ada_mse}")
print (f"R2 score for Adaboost: {ada_r2}")

Mean squared error for Adaboost: 0.6281709322244238
R2 score for Adaboost: 0.36541660298905887


# XGB Regressor

In [54]:
import xgboost
xgb_model = xgboost.XGBRegressor(colsample_bytree=0.4,   
            booster = 'gbtree',
             learning_rate=0.1,
             max_depth=5,
             min_child_weight=1.5,
             n_estimators=100,   
             reg_lambda = 1,
             seed=42)
xgb_model.fit(X_train, y_train)
y_prediction = xgb_model.predict(X_test)
xgb_mse = metrics.mean_squared_error(y_test,y_prediction)
xgb_r2 = metrics.r2_score(y_test, y_prediction)

print (f"Mean squared error for XGB: {xgb_mse}")
print (f"R2 score for XGB: {xgb_r2}")

Mean squared error for XGB: 0.36821353947786445
R2 score for XGB: 0.6280276804915799


# Voting Regressor

In [55]:
from sklearn.ensemble import VotingRegressor

In [56]:
votingReg = VotingRegressor([('lm',lm),('ridge',ridgeReg),('elastic',ENreg)])
votingReg.fit(X_train, y_train)
y_prediction = votingReg.predict(X_test)

In [57]:
voting_mse = metrics.mean_squared_error(y_test,y_prediction)
voting_r2 = metrics.r2_score(y_test, y_prediction)

In [59]:
print (f"Mean squared error for Voting: {voting_mse}")
print (f"R2 score for Voting: {voting_r2}")

Mean squared error for Voting: 0.5018675205845318
R2 score for Voting: 0.4930093391392236
