In [None]:
import numpy as np
import pandas as pd
import requests
import json
from sklearn.neighbors import BallTree, DistanceMetric

### Listings Data

In [None]:
listings = pd.read_csv('SG_cleaned_listings.csv')
listings

Unnamed: 0,id,name,description,neighborhood_overview,host_since,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,...,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,calculated_host_listings_count,amenities_str
0,50646,Pleasant Room along Bukit Timah,Fully furnished bedroom with a nice view on th...,The serenity & quiet surrounding makes it an i...,2010-09-08,No Data,,,f,1.0,...,4.56,4.72,4.78,4.78,4.94,4.72,4.50,f,1,"""Shampoo"", ""Essentials"", ""Kitchen"", ""Long term..."
1,71609,Ensuite Room (Room 1 & 2) near EXPO,For 3 rooms.Book room 1&2 and room 4 The spa...,No Data,2011-01-29,within a day,100.0,,f,8.0,...,4.44,4.37,4.00,4.63,4.78,4.26,4.32,f,5,"""Hangers"", ""Cooking basics"", ""Washer"", ""Hair d..."
2,71896,B&B Room 1 near Airport & EXPO,The space Vocational Stay Deluxe Bedroom in ...,No Data,2011-01-29,within a day,100.0,,f,8.0,...,4.16,4.22,4.09,4.43,4.43,4.17,4.04,f,5,"""Coffee maker"", ""Cooking basics"", ""Washer"", ""H..."
3,71903,Room 2-near Airport & EXPO,"Like your own home, 24hrs access. The space ...",Quiet and view of the playground with exercise...,2011-01-29,within a day,100.0,,f,8.0,...,4.41,4.39,4.52,4.63,4.64,4.50,4.36,f,5,"""Coffee maker"", ""Cooking basics"", ""Washer"", ""H..."
4,275343,"Conveniently located City Room!(1,2,3,4,5,6,7,8)","Awesome location and host Room near INSEAD, N...",No Data,2011-11-24,within a few hours,87.0,77.0,f,16.0,...,4.39,4.12,4.24,4.53,4.47,4.53,4.59,f,16,"""Shampoo"", ""Essentials"", ""Keypad"", ""Kitchen"", ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3140,53905492,Standard Twin,The space Hotel accommodations have been car...,No Data,2021-12-06,within an hour,100.0,100.0,f,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,t,4,"""Security cameras on property"", ""Hangers"", ""Lo..."
3141,53905653,Standard Queen & Single,The space Hotel accommodations have been car...,No Data,2021-12-06,within an hour,100.0,100.0,f,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,t,4,"""Security cameras on property"", ""Hangers"", ""Lo..."
3142,53910005,Affordable Office for 8 Pax in Heart of East,Convenient location in the heart of East Singa...,No Data,2017-04-04,a few days or more,0.0,,f,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,t,1,"""Long term stays allowed"", ""Air conditioning"",..."
3143,53910434,Deluxe Class 2BR Near Marina Bay,Situated near the bustling neighborhood of Mar...,"Located near the CBD, enjoy a wide variety of...",2017-10-27,within a few hours,95.0,30.0,f,225.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,f,179,"""Cleaning before checkout"", ""Safe"", ""Hangers"",..."


### MRT Data

In [None]:
# Get list of MRTs in Singapore
# Data is taken from: https://data.gov.sg/dataset/train-station-chinese-names - 2017
MRT = pd.read_csv("SG_MRT_Stations.csv")
MRT.head(3)

Unnamed: 0,stn_code,mrt_station_english,mrt_station_chinese,mrt_line_english,mrt_line_chinese
0,NS1,Jurong East,裕廊东,North South Line,南北线
1,NS2,Bukit Batok,武吉巴督,North South Line,南北线
2,NS3,Bukit Gombak,武吉甘柏,North South Line,南北线


In [None]:
list_of_mrt = MRT["mrt_station_english"].tolist()

In [None]:
# Add in new MRT Stations opened in 2020
new_mrt_stations = ['Woodlands North', 'Woodlands', 'Woodlands South', 'Springleaf', 
                       'Lentor','Bright Hill', 'Mayflower', 'Upper Thomson' , 'Caldecott']
for i in new_mrt_stations: 
    list_of_mrt.append(i)

# Remove LRT that has been closed
list_of_mrt.remove("Ten Mile Junction")

In [None]:
print(len(list_of_mrt))

192


In [None]:
mrt_lat = []
mrt_long = []

for i in range(0, len(list_of_mrt)):
    query_address = list_of_mrt[i]
    query_string = 'https://developers.onemap.sg/commonapi/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_mrt=json.loads(resp.content)
    
    if data_mrt['found'] != 0:
        mrt_lat.append(data_mrt["results"][0]["LATITUDE"])
        mrt_long.append(data_mrt["results"][0]["LONGITUDE"])
        print (str(query_address)+",Lat: "+data_mrt['results'][0]['LATITUDE'] +" Long: "+data_mrt['results'][0]['LONGITUDE'])

Jurong East,Lat: 1.3468080909852 Long: 103.732570156371
Bukit Batok,Lat: 1.34971948024642 Long: 103.750968656542
Bukit Gombak,Lat: 1.36731012055308 Long: 103.752439780696
Choa Chu Kang,Lat: 1.37698423514519 Long: 103.695087601092
Yew Tee,Lat: 1.39483312455963 Long: 103.744790526006
Kranji,Lat: 1.40413016625403 Long: 103.741615893274
Marsiling,Lat: 1.44315121142693 Long: 103.775164777049
Woodlands,Lat: 1.43357568988365 Long: 103.804329417963
Admiralty,Lat: 1.44062992421831 Long: 103.806198056529
Sembawang,Lat: 1.41961709618173 Long: 103.821095254872
Yishun,Lat: 1.43071857830265 Long: 103.836789569146
Khatib,Lat: 1.42555616856012 Long: 103.840524922186
Yio Chu Kang,Lat: 1.37585358258553 Long: 103.872600261996
Ang Mo Kio,Lat: 1.36582089504381 Long: 103.842847772078
Bishan,Lat: 1.36517069591469 Long: 103.83629822663
Braddell,Lat: 1.34173174251354 Long: 103.864688887374
Toa Payoh,Lat: 1.33372621378708 Long: 103.855718395843
Novena,Lat: 1.32168260642724 Long: 103.84266424228
Newton,Lat: 1.31

In [None]:
# Check number of values 
print(len(list_of_mrt))
print(len(mrt_lat))
print(len(mrt_long))

192
192
192


In [None]:
# Store this information in a dataframe
mrt_long_lat = pd.DataFrame({
    'MRT': list_of_mrt,
    'latitude': mrt_lat,
    'longitude': mrt_long
})
mrt_long_lat.head()

Unnamed: 0,MRT,latitude,longitude
0,Jurong East,1.3468080909852,103.732570156371
1,Bukit Batok,1.34971948024642,103.750968656542
2,Bukit Gombak,1.36731012055308,103.752439780696
3,Choa Chu Kang,1.37698423514519,103.695087601092
4,Yew Tee,1.39483312455963,103.744790526006


In [None]:
mrt_long_lat.latitude = mrt_long_lat.latitude.astype(float)
mrt_long_lat.longitude = mrt_long_lat.longitude.astype(float)

In [None]:
# Build k-d tree with haversine distance metric, which expects
# (lat, lon) in radians and returns distances in radians
dist = DistanceMetric.get_metric('haversine')
tree = BallTree(np.radians(mrt_long_lat[['latitude', 'longitude']]), metric=dist)



In [None]:
coords = np.radians(listings[['latitude', 'longitude']])
dists, ilocs = tree.query(coords)
# dists is in rad; convert to km
listings['dist_to_subway'] = dists.flatten() * 6367

In [None]:
listings.to_csv('SG_dist_cleaned_listings.csv', index = False)

In [None]:
SG_listings = pd.read_csv("SG_dist_cleaned_listings.csv")
SG_listings.head(3)

Unnamed: 0,id,name,description,neighborhood_overview,host_since,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,...,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,instant_bookable,calculated_host_listings_count,amenities_str,dist_to_subway
0,50646,Pleasant Room along Bukit Timah,Fully furnished bedroom with a nice view on th...,The serenity & quiet surrounding makes it an i...,2010-09-08,No Data,,,f,1.0,...,4.72,4.78,4.78,4.94,4.72,4.5,f,1,"""Shampoo"", ""Essentials"", ""Kitchen"", ""Long term...",0.534995
1,71609,Ensuite Room (Room 1 & 2) near EXPO,For 3 rooms.Book room 1&2 and room 4 The spa...,No Data,2011-01-29,within a day,100.0,,f,8.0,...,4.37,4.0,4.63,4.78,4.26,4.32,f,5,"""Hangers"", ""Cooking basics"", ""Washer"", ""Hair d...",0.926167
2,71896,B&B Room 1 near Airport & EXPO,The space Vocational Stay Deluxe Bedroom in ...,No Data,2011-01-29,within a day,100.0,,f,8.0,...,4.22,4.09,4.43,4.43,4.17,4.04,f,5,"""Coffee maker"", ""Cooking basics"", ""Washer"", ""H...",0.80274


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=2a92d0af-cecf-4cde-96f5-c3db3a7f88f1' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>