In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

### Load Libraries

In [2]:
import os
import time
import json
import pickle
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from copy import deepcopy
import googlemaps

  shapely_geos_version, geos_capi_version_string


### Set Data Dir

In [3]:
DATA_DIR_RAW = 'C:/Users/Hari/Desktop/CS3244/cs3244-pg26/data_raw'
DATA_DIR_PROCESSED = 'C:/Users/Hari/Desktop/CS3244/cs3244-pg26/data_processed'

### OneMap API

In [4]:
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'

def _headers(**kwargs):
    return {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': USER_AGENT,
        **kwargs
    }



class OneMapToken(object):
    def __init__(self, email, password, token_file):
        self.email = email
        self.password = password
        self.token_file = token_file
        self._token = None
    
    def pause(self):
        time.sleep(5)

    def courtesy_get(self, url):
        self.pause()
        return requests.get(url, timeout=30, headers=_headers())
    
    def _requestToken(self):
        url = 'https://developers.onemap.sg/privateapi/auth/post/getToken'
        data = {'email': self.email, 'password': self.password}
        return requests.post(url, headers=_headers(), json=data).json()
    
    def _requestPublicToken(self):
        url = 'https://developers.onemap.sg/publicapi/publicsessionid'
        return json.loads(requests.get(url, headers=_headers()).text)
    
    def _readToken(self):
        tokenFile = self.token_file

        try:
            with open(tokenFile, 'r') as f:
                token = json.load(f)

            if float(token['expiry_timestamp']) < time.time() - 10:
                raise ValueError('expired token')
        
        except Exception as e:
            token = self._requestToken()
            #token = self._requestPublicToken()
            with open(tokenFile, 'w+') as f:
                json.dump(token, f)

        return token

    def token(self):
        if (not self._token) or float(self._token['expiry_timestamp']) > time.time() - 10:
            self._token = self._readToken()
        return self._token['access_token']



class OneMapSearchApi(OneMapToken):
    def __init__(self, email, password, token_file):
        super().__init__(email, password, token_file)
        self._search_api_url = 'https://developers.onemap.sg/commonapi/elastic/omsearch'
    
    def querySearch(self, searchVal, returnGeom='Y', getAddrDetails='Y'):
        self.pause()
        params = {
            'searchVal': searchVal, 
            'returnGeom': returnGeom, 
            'getAddrDetails': getAddrDetails
        }
        url = f"{self._search_api_url}?{'&'.join(['{}={}'.format(k, v) for k, v in params.items()])}"
        results = self.courtesy_get(url).json()
        return results



class OneMapSchoolApi(OneMapToken):
    def __init__(self, email, password, token_file):
        super().__init__(email, password, token_file)
        self._school_layer_url = 'https://developers.onemap.sg/publicapi/schooldataAPI/getschoollayer'
        self._schools_url = 'https://developers.onemap.sg/publicapi/schooldataAPI/retrieveAllSchools'

    def querySchoolDistanceLayer(self, school_name):
        self.pause()
        params = {
            'token': self.token(),
            'schoollayer': school_name
        }
        url = f"{self._school_layer_url}?{'&'.join(['{}={}'.format(k, v) for k, v in params.items()])}"
        results = self.courtesy_get(url).json()
        return results
        
    def querySchools(self):
        self.pause()
        params = {
            'token': self.token()
        }
        url = f"{self._schools_url}?{'&'.join(['{}={}'.format(k, v) for k, v in params.items()])}"
        results = self.courtesy_get(url).json()
        return results



class OneMapThemeApi(OneMapToken):
    def __init__(self, email, password, token_file):
        super().__init__(email, password, token_file)
        self._theme_layer_url = 'https://developers.onemap.sg/publicapi/themeapi/retrieveTheme'
    
    def queryThemeLayer(self, queryName):
        self.pause()
        params = {
            'token': self.token(), 
            'queryName': queryName
        }
        url = f"{self._theme_layer_url}?{'&'.join(['{}={}'.format(k, v) for k, v in params.items()])}"
        results = self.courtesy_get(url).json()
        return results



class OneMapEssentialServiceApi(OneMapToken):
    def __init__(self, email, password, token_file):
        super().__init__(email, password, token_file)
        self._essential_svc_url = 'https://developers.onemap.sg/publicapi/essentialsvc'
    
    def queryEssentialService(self, service):
        self.pause()
        params = {
            'lat': 0, 
            'lng': 0, 
            'token': self.token()
        }
        url = f"{self._essential_svc_url}/{service}?{'&'.join(['{}={}'.format(k, v) for k, v in params.items()])}"
        results = self.courtesy_get(url).json()
        return results

### Set Auth Variables

In [5]:
EMAIL = ''
PASSWORD = ''
TOKEN_FILE = ''

### Retrieve Primary Schools

In [6]:
om_sch = OneMapSchoolApi(EMAIL, PASSWORD, TOKEN_FILE)
om_sch

<__main__.OneMapSchoolApi at 0x16dc50fde48>

In [None]:
res = om_sch.querySchools()
res

In [8]:
schools = deepcopy(res)
schools = pd.DataFrame(schools['SearchResults'][1:])

print(schools.shape, '\n', schools.columns)
display(schools.head(3))

(181, 15) 
 Index(['SCHOOLNAME', 'SCH_HSE_BLK_NUM', 'HSE_BLK_NUM', 'SCH_POSTAL_CODE',
       'POSTAL_CODE', 'SCH_ROAD_NAME', 'ROAD_NAME', 'HYPERLINK', 'MOREINFO',
       'SCH_Y_ADDR', 'SCH_X_ADDR', 'LATITUDE', 'LONGITUDE', 'GEOMETRY',
       'SCH_TEXT'],
      dtype='object')


Unnamed: 0,SCHOOLNAME,SCH_HSE_BLK_NUM,HSE_BLK_NUM,SCH_POSTAL_CODE,POSTAL_CODE,SCH_ROAD_NAME,ROAD_NAME,HYPERLINK,MOREINFO,SCH_Y_ADDR,SCH_X_ADDR,LATITUDE,LONGITUDE,GEOMETRY,SCH_TEXT
0,ADMIRALTY PRIMARY SCHOOL,11,11,738907,738907,WOODLANDS CIRCLE,WOODLANDS CIRCLE,,https://www.moe.gov.sg/schoolfinder,47189.878696414,24322.522067992,1.4430427398237,103.80027282363,iwxG{ipxRXm@p@UsBcGsE|AdBdFFCNCNEPAPAN@P@RDPH@@,Admiralty Pri Sch
1,AHMAD IBRAHIM PRIMARY SCHOOL,10,10,768643,768643,YISHUN STREET 11,YISHUN STREET 11,,https://www.moe.gov.sg/schoolfinder,46161.200044157,27932.207851114,1.4337399845972,103.83270940173,}zvGy|vxRgB@kA??c@_AT?D@R?RCPCRGPGPINKNe@f@@J`...,Ahmad Ibrahim Pri Sch
2,AI TONG SCHOOL,100,100,579646,579646,BRIGHT HILL DRIVE,BRIGHT HILL DRIVE,,https://www.moe.gov.sg/schoolfinder,38079.991263576,27956.938771506,1.3606564354832,103.83293164489,{{hG_vvxRTAhDEJCFIdAeDFMHKJKJGBCIS_EoAADABEHEH...,Ai Tong Sch


In [9]:
schools.to_csv(f"{DATA_DIR_RAW}/schools/sg_primary_schools_geo.csv", encoding='utf-8-sig', index=False)

In [10]:
schools.drop(['SCH_HSE_BLK_NUM', 'SCH_POSTAL_CODE', 'SCH_ROAD_NAME', 'MOREINFO', 'HYPERLINK', 'SCH_Y_ADDR', 'SCH_X_ADDR', 'GEOMETRY', 'SCH_TEXT'], axis=1, inplace=True)

print(schools.shape, '\n', schools.columns)
display(schools.head(3))

(181, 6) 
 Index(['SCHOOLNAME', 'HSE_BLK_NUM', 'POSTAL_CODE', 'ROAD_NAME', 'LATITUDE',
       'LONGITUDE'],
      dtype='object')


Unnamed: 0,SCHOOLNAME,HSE_BLK_NUM,POSTAL_CODE,ROAD_NAME,LATITUDE,LONGITUDE
0,ADMIRALTY PRIMARY SCHOOL,11,738907,WOODLANDS CIRCLE,1.4430427398237,103.80027282363
1,AHMAD IBRAHIM PRIMARY SCHOOL,10,768643,YISHUN STREET 11,1.4337399845972,103.83270940173
2,AI TONG SCHOOL,100,579646,BRIGHT HILL DRIVE,1.3606564354832,103.83293164489


In [11]:
schools.to_csv(f"{DATA_DIR_PROCESSED}/schools/sg_primary_schools_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve MRT Stations

In [12]:
train_codes = pd.read_excel(f"{DATA_DIR_RAW}/trains/train_station_codes.xls")
train_codes.columns = train_codes.columns.str.upper()
train_codes.drop(['MRT_STATION_CHINESE', 'MRT_LINE_CHINESE'], axis=1, inplace=True)
train_codes = train_codes.apply(lambda x: x.astype(str).str.upper())

print(train_codes.shape, '\n', train_codes.columns)
display(train_codes.head(3))

(193, 3) 
 Index(['STN_CODE', 'MRT_STATION_ENGLISH', 'MRT_LINE_ENGLISH'], dtype='object')


Unnamed: 0,STN_CODE,MRT_STATION_ENGLISH,MRT_LINE_ENGLISH
0,NS1,JURONG EAST,NORTH-SOUTH LINE
1,NS2,BUKIT BATOK,NORTH-SOUTH LINE
2,NS3,BUKIT GOMBAK,NORTH-SOUTH LINE


In [13]:
train_codes.to_csv(f"{DATA_DIR_PROCESSED}/trains/sg_train_codes.csv", encoding='utf-8-sig', index=False)

In [14]:
om = OneMapSearchApi(EMAIL, PASSWORD, TOKEN_FILE)
om

<__main__.OneMapSearchApi at 0x16dc52e59c8>

In [None]:
trains = []
missed_stn_codes = []

for i, stn_code in enumerate(train_codes.STN_CODE):
    print(f"Trying station {i+1}, {stn_code}")
    try:
        res = om.querySearch(stn_code)
        trains.append(res['results'][0])
        print(f"{stn_code} success!")
        
    except Exception as e:
        print(e)
        print(f"{stn_code} failed")
        missed_stn_codes.append(stn_code)

print(f"Missed Station Codes: {missed_stn_codes}")
print(f"Trains Extracted Successfully: {len(trains)}")

In [16]:
trains_df = pd.DataFrame(trains)

print(trains_df.shape, '\n', trains_df.columns)
display(trains_df.head(3))

(193, 11) 
 Index(['SEARCHVAL', 'BLK_NO', 'ROAD_NAME', 'BUILDING', 'ADDRESS', 'POSTAL',
       'X', 'Y', 'LATITUDE', 'LONGITUDE', 'LONGTITUDE'],
      dtype='object')


Unnamed: 0,SEARCHVAL,BLK_NO,ROAD_NAME,BUILDING,ADDRESS,POSTAL,X,Y,LATITUDE,LONGITUDE,LONGTITUDE
0,JURONG EAST MRT STATION (EW24 / NS1),10,JURONG EAST STREET 12,JURONG EAST MRT STATION (EW24 / NS1),10 JURONG EAST STREET 12 JURONG EAST MRT STATI...,609690,17869.0570516568,35038.9688695427,1.33315281585758,103.742286332403,103.742286332403
1,BUKIT BATOK MRT STATION (NS2),10,BUKIT BATOK CENTRAL,BUKIT BATOK MRT STATION (NS2),10 BUKIT BATOK CENTRAL BUKIT BATOK MRT STATION...,659958,18679.3223191258,36794.9260214306,1.34903331201636,103.749566478309,103.749566478309
2,BUKIT GOMBAK MRT STATION (NS3),802,BUKIT BATOK WEST AVENUE 5,BUKIT GOMBAK MRT STATION (NS3),802 BUKIT BATOK WEST AVENUE 5 BUKIT GOMBAK MRT...,659083,18926.9127006577,37854.0358848721,1.35861159094192,103.751790910733,103.751790910733


In [17]:
trains_df.to_csv(f"{DATA_DIR_RAW}/trains/sg_trains_geo.csv", encoding='utf-8-sig', index=False)

In [18]:
trains_df.drop(['BUILDING', 'X', 'Y', 'LONGTITUDE'], axis=1, inplace=True)
trains_df['TRAIN_NAME'] = trains_df.SEARCHVAL.str.split('\s+\(').apply(lambda x: x[0])
trains_df['TRAIN_CODES'] = trains_df.SEARCHVAL.str.split('\s+\(').apply(lambda x: x[1] if len(x) > 1 else '').str.replace(')', '').str.strip()
trains_df = pd.DataFrame({'TRAIN_NAME': trains_df.TRAIN_NAME, 'TRAIN_CODES': trains_df.TRAIN_CODES, 'BLK_NO': trains_df.BLK_NO, 'ROAD_NAME': trains_df.ROAD_NAME, 
                          'ADDRESS': trains_df.ADDRESS, 'POSTAL': trains_df.POSTAL, 'LATITUDE': trains_df.LATITUDE, 'LONGITUDE': trains_df.LONGITUDE})

print(trains_df.shape, '\n', trains_df.columns)
display(trains_df.head(3))

(193, 8) 
 Index(['TRAIN_NAME', 'TRAIN_CODES', 'BLK_NO', 'ROAD_NAME', 'ADDRESS', 'POSTAL',
       'LATITUDE', 'LONGITUDE'],
      dtype='object')


  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,TRAIN_NAME,TRAIN_CODES,BLK_NO,ROAD_NAME,ADDRESS,POSTAL,LATITUDE,LONGITUDE
0,JURONG EAST MRT STATION,EW24 / NS1,10,JURONG EAST STREET 12,10 JURONG EAST STREET 12 JURONG EAST MRT STATI...,609690,1.33315281585758,103.742286332403
1,BUKIT BATOK MRT STATION,NS2,10,BUKIT BATOK CENTRAL,10 BUKIT BATOK CENTRAL BUKIT BATOK MRT STATION...,659958,1.34903331201636,103.749566478309
2,BUKIT GOMBAK MRT STATION,NS3,802,BUKIT BATOK WEST AVENUE 5,802 BUKIT BATOK WEST AVENUE 5 BUKIT GOMBAK MRT...,659083,1.35861159094192,103.751790910733


In [19]:
trains_df.to_csv(f"{DATA_DIR_PROCESSED}/trains/sg_trains_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve Hawker Centres

In [20]:
om_theme = OneMapThemeApi(EMAIL, PASSWORD, TOKEN_FILE)
om_theme

<__main__.OneMapThemeApi at 0x16dc533bc88>

In [None]:
res = om_theme.queryThemeLayer('hawkercentre')
res

In [22]:
hawkers = pd.DataFrame(res['SrchResults'][1:])

print(hawkers.shape, '\n', hawkers.columns)
display(hawkers.head(3))

(125, 20) 
 Index(['NAME', 'DESCRIPTION', 'ADDRESSBLOCKHOUSENUMBER', 'ADDRESSPOSTALCODE',
       'ADDRESSSTREETNAME', 'PHOTOURL', 'LANDXADDRESSPOINT',
       'LANDYADDRESSPOINT', 'EST_ORIGINAL_COMPLETION_DATE', 'STATUS',
       'HUP_COMPLETION_DATE', 'ADDRESS_MYENV', 'Type', 'LatLng', 'ICON_NAME',
       'APPROXIMATE_GFA', 'INFO_ON_CO_LOCATORS', 'ADDRESSBUILDINGNAME',
       'AWARDED_DATE', 'IMPLEMENTATION_DATE'],
      dtype='object')


Unnamed: 0,NAME,DESCRIPTION,ADDRESSBLOCKHOUSENUMBER,ADDRESSPOSTALCODE,ADDRESSSTREETNAME,PHOTOURL,LANDXADDRESSPOINT,LANDYADDRESSPOINT,EST_ORIGINAL_COMPLETION_DATE,STATUS,HUP_COMPLETION_DATE,ADDRESS_MYENV,Type,LatLng,ICON_NAME,APPROXIMATE_GFA,INFO_ON_CO_LOCATORS,ADDRESSBUILDINGNAME,AWARDED_DATE,IMPLEMENTATION_DATE
0,Telok Blangah Rise Blk 36 (Telok Blangah Rise ...,HUP Standard Upgrading,36,90036,Telok Blangah Rise,http://www.nea.gov.sg/images/default-source/Ha...,26753.38,28355.97,29/10/1975,Existing,20/10/2010,"Blk 36, Telok Blangah Rise, Singapore 090036",Point,"1.27271579716508,103.82211725658",HC icons_Opt 8.jpg,,,,,
1,Tiong Bahru Market,HUP Rebuilding,30,168898,Seng Poh Road,http://www.nea.gov.sg/images/default-source/Ha...,27892.07,29724.05,,Existing,10/2/2006,"30, Seng Poh Road, Singapore 168898",Point,"1.28508825756819,103.832348785334",HC icons_Opt 8.jpg,10630.72,,,,
2,Ang Mo Kio Ave 6 Blk 724 (Blk 724 Ang Mo Kio M...,HUP Reconfiguration,724,560724,Ang Mo Kio Ave 6,http://www.nea.gov.sg/images/default-source/Ha...,29469.07,39356.14,31/1/1980,Existing,2/1/2008,"Blk 724, Ang Mo Kio Ave 6, Singapore 560724",Point,"1.37219743270045,103.846519219268",HC icons_Opt 8.jpg,,,,,


In [23]:
hawkers.to_csv(f"{DATA_DIR_RAW}/hawkers/sg_hawkers_geo.csv", encoding='utf-8-sig', index=False)

In [24]:
hawkers_processed = deepcopy(hawkers)

hawkers_processed.columns = hawkers_processed.columns.str.upper()

under_construction_drop_idx = hawkers_processed[hawkers_processed.STATUS == 'Under Construction'].index
hawkers_processed.drop(under_construction_drop_idx, axis=0, inplace=True)

hawkers_processed.drop(['DESCRIPTION', 'PHOTOURL', 'LANDXADDRESSPOINT', 'LANDYADDRESSPOINT', 'HUP_COMPLETION_DATE', 
                        'ADDRESS_MYENV', 'TYPE', 'ICON_NAME', 'APPROXIMATE_GFA', 'INFO_ON_CO_LOCATORS', 'ADDRESSBUILDINGNAME', 
                        'AWARDED_DATE', 'IMPLEMENTATION_DATE', 'STATUS'], axis=1, inplace=True)

hawkers_processed[['LATITUDE', 'LONGITUDE']] = hawkers_processed.LATLNG.str.split(',', expand=True)

hawkers_processed = pd.DataFrame({'HAWKER_ID': list(range(1, len(hawkers_processed)+1)), 'NAME': hawkers_processed.NAME, 
                                  'BLK_NO': hawkers_processed.ADDRESSBLOCKHOUSENUMBER, 'STREET_NAME': hawkers_processed.ADDRESSSTREETNAME, 
                                  'POSTAL': hawkers_processed.ADDRESSPOSTALCODE, 'COMPLETION_DATE': hawkers_processed.EST_ORIGINAL_COMPLETION_DATE, 
                                  'LATITUDE': hawkers_processed.LATITUDE, 'LONGITUDE': hawkers_processed.LONGITUDE})

hawkers_processed.NAME = hawkers_processed.NAME.astype(str).str.upper()
hawkers_processed.STREET_NAME = hawkers_processed.STREET_NAME.astype(str).str.upper()

print(hawkers_processed.shape, '\n', hawkers_processed.columns)
display(hawkers_processed.head(3))

(114, 8) 
 Index(['HAWKER_ID', 'NAME', 'BLK_NO', 'STREET_NAME', 'POSTAL',
       'COMPLETION_DATE', 'LATITUDE', 'LONGITUDE'],
      dtype='object')


Unnamed: 0,HAWKER_ID,NAME,BLK_NO,STREET_NAME,POSTAL,COMPLETION_DATE,LATITUDE,LONGITUDE
0,1,TELOK BLANGAH RISE BLK 36 (TELOK BLANGAH RISE ...,36,TELOK BLANGAH RISE,90036,29/10/1975,1.27271579716508,103.82211725658
1,2,TIONG BAHRU MARKET,30,SENG POH ROAD,168898,,1.28508825756819,103.832348785334
2,3,ANG MO KIO AVE 6 BLK 724 (BLK 724 ANG MO KIO M...,724,ANG MO KIO AVE 6,560724,31/1/1980,1.37219743270045,103.846519219268


In [25]:
hawkers_processed.to_csv(f"{DATA_DIR_PROCESSED}/hawkers/sg_hawkers_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve Retail Pharmacies

In [26]:
om_theme = OneMapThemeApi(EMAIL, PASSWORD, TOKEN_FILE)
om_theme

<__main__.OneMapThemeApi at 0x16dc533b2c8>

In [None]:
res = om_theme.queryThemeLayer('registered_pharmacy')
res

In [28]:
pharm = pd.DataFrame(res['SrchResults'][1:])

print(pharm.shape, '\n', pharm.columns)
display(pharm.head(3))

(269, 10) 
 Index(['NAME', 'DESCRIPTION', 'ADDRESSBLOCKHOUSENUMBER', 'ADDRESSFLOORNUMBER',
       'ADDRESSPOSTALCODE', 'ADDRESSSTREETNAME', 'ADDRESSUNITNUMBER', 'Type',
       'LatLng', 'ICON_NAME'],
      dtype='object')


Unnamed: 0,NAME,DESCRIPTION,ADDRESSBLOCKHOUSENUMBER,ADDRESSFLOORNUMBER,ADDRESSPOSTALCODE,ADDRESSSTREETNAME,ADDRESSUNITNUMBER,Type,LatLng,ICON_NAME
0,Guardian Pharmacy (Chinatown Point 4),Chinatown Point 4,133,B1,59413,NEW BRIDGE ROAD,34,Point,"1.28499883313915,103.844697087192",Pharmacies_new.jpg
1,Guardian Pharmacy (City Link),City Link,1,B1,39393,RAFFLES LINK,67,Point,"1.29277773939561,103.854173508666",Pharmacies_new.jpg
2,Guardian Pharmacy (City Square Mall),City Square Mall,180,B1,208539,KITCHENER ROAD,18/19,Point,"1.31139350343283,103.856487115197",Pharmacies_new.jpg


In [29]:
pharm.to_csv(f"{DATA_DIR_RAW}/pharmacies/sg_pharmacies_geo.csv", encoding='utf-8-sig', index=False)

In [30]:
pharm_processed = deepcopy(pharm)

pharm_processed.columns = pharm_processed.columns.str.upper()

pharm_processed[['LATITUDE', 'LONGITUDE']] = pharm_processed.LATLNG.str.split(',', expand=True)

pharm_processed.drop(['TYPE', 'ICON_NAME', 'LATLNG'], axis=1, inplace=True)

pharm_processed.NAME = pharm_processed.NAME.str.upper()
pharm_processed.DESCRIPTION = pharm_processed.DESCRIPTION.str.upper()

pharm_processed = pd.DataFrame({'PHARMACY_ID': list(range(1, len(pharm_processed)+1)), 'NAME': pharm_processed.NAME, 
                                'DESCRIPTION': pharm_processed.DESCRIPTION, 'BLK_NO': pharm_processed.ADDRESSBLOCKHOUSENUMBER, 
                                'FLOOR': pharm_processed.ADDRESSFLOORNUMBER, 'POSTAL': pharm_processed.ADDRESSPOSTALCODE, 
                                'STREET_NAME': pharm_processed.ADDRESSSTREETNAME, 'UNIT_NO': pharm_processed.ADDRESSUNITNUMBER, 
                                'LATITUDE': pharm_processed.LATITUDE, 'LONGITUDE': pharm_processed.LONGITUDE})

print(pharm_processed.shape, '\n', pharm_processed.columns)
display(pharm_processed.head(3))

(269, 10) 
 Index(['PHARMACY_ID', 'NAME', 'DESCRIPTION', 'BLK_NO', 'FLOOR', 'POSTAL',
       'STREET_NAME', 'UNIT_NO', 'LATITUDE', 'LONGITUDE'],
      dtype='object')


Unnamed: 0,PHARMACY_ID,NAME,DESCRIPTION,BLK_NO,FLOOR,POSTAL,STREET_NAME,UNIT_NO,LATITUDE,LONGITUDE
0,1,GUARDIAN PHARMACY (CHINATOWN POINT 4),CHINATOWN POINT 4,133,B1,59413,NEW BRIDGE ROAD,34,1.28499883313915,103.844697087192
1,2,GUARDIAN PHARMACY (CITY LINK),CITY LINK,1,B1,39393,RAFFLES LINK,67,1.29277773939561,103.854173508666
2,3,GUARDIAN PHARMACY (CITY SQUARE MALL),CITY SQUARE MALL,180,B1,208539,KITCHENER ROAD,18/19,1.31139350343283,103.856487115197


In [31]:
pharm_processed.to_csv(f"{DATA_DIR_PROCESSED}/pharmacies/sg_pharmacies_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve National Parks

In [6]:
om_theme = OneMapThemeApi(EMAIL, PASSWORD, TOKEN_FILE)
om_theme

<__main__.OneMapThemeApi at 0x14c4f2d14c8>

In [None]:
res = om_theme.queryThemeLayer('nationalparks')
res

In [8]:
parks = pd.DataFrame(res['SrchResults'][1:])

print(parks.shape, '\n', parks.columns)
display(parks.head(3))

(352, 8) 
 Index(['NAME', 'DESCRIPTION', 'LANDXADDRESSPOINT', 'LANDYADDRESSPOINT', 'Type',
       'LatLng', 'ICON_NAME', 'HYPERLINK'],
      dtype='object')


Unnamed: 0,NAME,DESCRIPTION,LANDXADDRESSPOINT,LANDYADDRESSPOINT,Type,LatLng,ICON_NAME,HYPERLINK
0,Telok Ayer Green,"Bounded by Amoy Street, Boon Tat Street and Te...",29594.3027,29323.4141,Point,"1.28146500698614,103.847644002876",parks.gif,
1,Mayflower Crescent Playground,At the junction of Mayflower Crescent and Mayf...,28695.5957,39413.7,Point,"1.3727180131601,103.839569003072",parks.gif,
2,Sunrise Drive Playground 1,Located along Sunrise Drive,30676.6113,41137.35,Point,"1.38830597203414,103.857369999308",parks.gif,


In [9]:
parks.to_csv(f"{DATA_DIR_RAW}/parks/sg_parks_geo.csv", encoding='utf-8-sig', index=False)

In [11]:
parks_processed = deepcopy(parks)

parks_processed.columns = parks_processed.columns.str.upper()

parks_processed[['LATITUDE', 'LONGITUDE']] = parks_processed.LATLNG.str.split(',', expand=True)

parks_processed.drop(['LANDXADDRESSPOINT', 'LANDYADDRESSPOINT', 'TYPE', 'ICON_NAME', 'LATLNG', 'HYPERLINK'], axis=1, inplace=True)

parks_processed.NAME = parks_processed.NAME.str.upper()
parks_processed.DESCRIPTION = parks_processed.DESCRIPTION.str.upper()

parks_processed = pd.DataFrame({'PARK_ID': list(range(1, len(parks_processed)+1)), 'NAME': parks_processed.NAME, 
                                'DESCRIPTION': parks_processed.DESCRIPTION, 'LATITUDE': parks_processed.LATITUDE, 
                                'LONGITUDE': parks_processed.LONGITUDE})

print(parks_processed.shape, '\n', parks_processed.columns)
display(parks_processed.head(3))

(352, 5) 
 Index(['PARK_ID', 'NAME', 'DESCRIPTION', 'LATITUDE', 'LONGITUDE'], dtype='object')


Unnamed: 0,PARK_ID,NAME,DESCRIPTION,LATITUDE,LONGITUDE
0,1,TELOK AYER GREEN,"BOUNDED BY AMOY STREET, BOON TAT STREET AND TE...",1.28146500698614,103.847644002876
1,2,MAYFLOWER CRESCENT PLAYGROUND,AT THE JUNCTION OF MAYFLOWER CRESCENT AND MAYF...,1.3727180131601,103.839569003072
2,3,SUNRISE DRIVE PLAYGROUND 1,LOCATED ALONG SUNRISE DRIVE,1.38830597203414,103.857369999308


In [12]:
parks_processed.to_csv(f"{DATA_DIR_PROCESSED}/parks/sg_parks_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve Libraries

In [13]:
om_theme = OneMapThemeApi(EMAIL, PASSWORD, TOKEN_FILE)
om_theme

<__main__.OneMapThemeApi at 0x14c4f3bcb08>

In [None]:
res = om_theme.queryThemeLayer('libraries')
res

In [15]:
libraries = pd.DataFrame(res['SrchResults'][1:])

print(libraries.shape, '\n', libraries.columns)
display(libraries.head(3))

(30, 15) 
 Index(['NAME', 'DESCRIPTION', 'ADDRESSBLOCKHOUSENUMBER', 'ADDRESSPOSTALCODE',
       'ADDRESSSTREETNAME', 'HYPERLINK', 'PHOTOURL', 'LANDXADDRESSPOINT',
       'LANDYADDRESSPOINT', 'Type', 'LatLng', 'ICON_NAME',
       'ADDRESSFLOORNUMBER', 'ADDRESSUNITNUMBER', 'ADDRESSBUILDINGNAME'],
      dtype='object')


Unnamed: 0,NAME,DESCRIPTION,ADDRESSBLOCKHOUSENUMBER,ADDRESSPOSTALCODE,ADDRESSSTREETNAME,HYPERLINK,PHOTOURL,LANDXADDRESSPOINT,LANDYADDRESSPOINT,Type,LatLng,ICON_NAME,ADDRESSFLOORNUMBER,ADDRESSUNITNUMBER,ADDRESSBUILDINGNAME
0,Ang Mo Kio Public Library,AMPL,4300,569842,Ang Mo Kio Avenue 6,https://www.nlb.gov.sg/main/visit-us/our-libra...,https://www.nlb.gov.sg/files/images/galleries/...,29364.62,39642.82,Point,"1.37479006912036,103.845580674824",library.gif,,,
1,Bedok Public Library,BEPL,11,469662,Bedok North St. 1,https://www.nlb.gov.sg/main/visit-us/our-libra...,https://www.nlb.gov.sg/files/images/galleries/...,38947.87,34357.48,Point,"1.32698939805286,103.931691125839",library.gif,2.0,3.0,
2,Bishan Public Library,BIPL,5,579841,Bishan Place,https://www.nlb.gov.sg/main/visit-us/our-libra...,https://www.nlb.gov.sg/files/images/galleries/...,29727.41,36885.15,Point,"1.3498506667736,103.848840445413",library.gif,1.0,1.0,


In [16]:
libraries.to_csv(f"{DATA_DIR_RAW}/libraries/sg_libraries_geo.csv", encoding='utf-8-sig', index=False)

In [19]:
libraries_processed = deepcopy(libraries)

libraries_processed.columns = libraries_processed.columns.str.upper()

libraries_processed[['LATITUDE', 'LONGITUDE']] = libraries_processed.LATLNG.str.split(',', expand=True)

libraries_processed.drop(['HYPERLINK', 'PHOTOURL', 'LANDXADDRESSPOINT', 'LANDYADDRESSPOINT', 'TYPE', 'ICON_NAME', 'LATLNG', 
                          'ADDRESSBUILDINGNAME'], axis=1, inplace=True)

libraries_processed.NAME = libraries_processed.NAME.str.upper()
libraries_processed.DESCRIPTION = libraries_processed.DESCRIPTION.str.upper()
libraries_processed.ADDRESSSTREETNAME = libraries_processed.ADDRESSSTREETNAME.str.upper()

libraries_processed = pd.DataFrame({'LIBRARY_ID': list(range(1, len(libraries_processed)+1)), 'NAME': libraries_processed.NAME, 
                                'DESCRIPTION': libraries_processed.DESCRIPTION, 'BLK_NO': libraries_processed.ADDRESSBLOCKHOUSENUMBER, 
                                'FLOOR': libraries_processed.ADDRESSFLOORNUMBER, 'POSTAL': libraries_processed.ADDRESSPOSTALCODE, 
                                'STREET_NAME': libraries_processed.ADDRESSSTREETNAME, 'UNIT_NO': libraries_processed.ADDRESSUNITNUMBER, 
                                'LATITUDE': libraries_processed.LATITUDE, 'LONGITUDE': libraries_processed.LONGITUDE})

print(libraries_processed.shape, '\n', libraries_processed.columns)
display(libraries_processed.head(3))

(30, 10) 
 Index(['LIBRARY_ID', 'NAME', 'DESCRIPTION', 'BLK_NO', 'FLOOR', 'POSTAL',
       'STREET_NAME', 'UNIT_NO', 'LATITUDE', 'LONGITUDE'],
      dtype='object')


Unnamed: 0,LIBRARY_ID,NAME,DESCRIPTION,BLK_NO,FLOOR,POSTAL,STREET_NAME,UNIT_NO,LATITUDE,LONGITUDE
0,1,ANG MO KIO PUBLIC LIBRARY,AMPL,4300,,569842,ANG MO KIO AVENUE 6,,1.37479006912036,103.845580674824
1,2,BEDOK PUBLIC LIBRARY,BEPL,11,2.0,469662,BEDOK NORTH ST. 1,3.0,1.32698939805286,103.931691125839
2,3,BISHAN PUBLIC LIBRARY,BIPL,5,1.0,579841,BISHAN PLACE,1.0,1.3498506667736,103.848840445413


In [20]:
libraries_processed.to_csv(f"{DATA_DIR_PROCESSED}/libraries/sg_libraries_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve ATMs

In [32]:
om_svc = OneMapEssentialServiceApi(EMAIL, PASSWORD, TOKEN_FILE)
om_svc

<__main__.OneMapEssentialServiceApi at 0x16dc53dd9c8>

In [None]:
res = om_svc.queryEssentialService('retrieveNearbyAtm')
res

In [34]:
atm = pd.DataFrame(res['SearchResults'])

print(atm.shape, '\n', atm.columns)
display(atm.head(3))

(516, 8) 
 Index(['NAME', 'ADDRESS', 'LATITUDE', 'LONGITUDE', 'POSTAL_CODE', 'TYPE', 'X',
       'Y'],
      dtype='object')


Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,POSTAL_CODE,TYPE,X,Y
0,STANDARD CHARTERED ATM,391 ORCHARD ROAD #B2-39 TAKASHIMAYA SHOPPING C...,1.302424172,103.8346761,238872,STANDARD CHARTERED,28151.08249,31640.966
1,STANDARD CHARTERED ATM,"200 VICTORIA STREET B1-K15 BUGIS JUCTION, SING...",1.299016774,103.8554824,188021,STANDARD CHARTERED,30466.63059,31264.20428
2,STANDARD CHARTERED ATM,"1 HARBOURFRONT WALK #B2-01 VIVOCITY, SINGAPORE...",1.264409139,103.8221717,98585,STANDARD CHARTERED,26759.43891,27437.46278


In [35]:
atm.to_csv(f"{DATA_DIR_RAW}/atm/sg_atm_geo.csv", encoding='utf-8-sig', index=False)

In [36]:
atm_processed = deepcopy(atm)

atm_processed.drop(['X', 'Y'], axis=1, inplace=True)

atm_processed = pd.DataFrame({'ATM_ID': list(range(1, len(atm_processed)+1)), 'NAME': atm_processed.NAME, 
                              'TYPE': atm_processed.TYPE, 'ADDRESS': atm_processed.ADDRESS, 'POSTAL': atm_processed.POSTAL_CODE, 
                              'LATITUDE': atm_processed.LATITUDE, 'LONGITUDE': atm_processed.LONGITUDE})

print(atm_processed.shape, '\n', atm_processed.columns)
display(atm_processed.head(3))

(516, 7) 
 Index(['ATM_ID', 'NAME', 'TYPE', 'ADDRESS', 'POSTAL', 'LATITUDE', 'LONGITUDE'], dtype='object')


Unnamed: 0,ATM_ID,NAME,TYPE,ADDRESS,POSTAL,LATITUDE,LONGITUDE
0,1,STANDARD CHARTERED ATM,STANDARD CHARTERED,391 ORCHARD ROAD #B2-39 TAKASHIMAYA SHOPPING C...,238872,1.302424172,103.8346761
1,2,STANDARD CHARTERED ATM,STANDARD CHARTERED,"200 VICTORIA STREET B1-K15 BUGIS JUCTION, SING...",188021,1.299016774,103.8554824
2,3,STANDARD CHARTERED ATM,STANDARD CHARTERED,"1 HARBOURFRONT WALK #B2-01 VIVOCITY, SINGAPORE...",98585,1.264409139,103.8221717


In [37]:
atm_processed.to_csv(f"{DATA_DIR_PROCESSED}/atm/sg_atm_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve Convenience Stores

In [38]:
om_svc = OneMapEssentialServiceApi(EMAIL, PASSWORD, TOKEN_FILE)
om_svc

<__main__.OneMapEssentialServiceApi at 0x16dc5390988>

In [None]:
res = om_svc.queryEssentialService('retrieveNearbyStore')
res

In [40]:
stores = pd.DataFrame(res['SearchResults'])

print(stores.shape, '\n', stores.columns)
display(stores.head(3))

(567, 8) 
 Index(['NAME', 'ADDRESS', 'LATITUDE', 'LONGITUDE', 'POSTAL_CODE', 'TYPE', 'X',
       'Y'],
      dtype='object')


Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,POSTAL_CODE,TYPE,X,Y
0,KIM ENG MINI SUPERMARKET,"103 YISHUN RING ROAD , #01-79, SINGAPORE 760103",1.43136935,103.8288928,760103,NEW ECON MINIMART,27507.48255,45899.067610000006
1,FAIR MART SUPERSTORE,"297 COMPASSVALE STREET, #01-01, SINGAPORE 540297",1.395227471,103.8998345,540297,NEW ECON MINIMART,35402.34231,41902.78527
2,1588 LE PTE LTD,"158B RIVERVALE CRESCENT, #01-695, SINGAPORE 54...",1.38841338,103.9066762,542158,NEW ECON MINIMART,36163.75154,41149.33963


In [41]:
stores.to_csv(f"{DATA_DIR_RAW}/stores/sg_stores_geo.csv", encoding='utf-8-sig', index=False)

In [42]:
stores_processed = deepcopy(stores)

stores_processed.drop(['X', 'Y'], axis=1, inplace=True)

stores_processed = pd.DataFrame({'STORE_ID': list(range(1, len(stores_processed)+1)), 'NAME': stores_processed.NAME, 
                                 'TYPE': stores_processed.TYPE, 'ADDRESS': stores_processed.ADDRESS, 'POSTAL': stores_processed.POSTAL_CODE, 
                                 'LATITUDE': stores_processed.LATITUDE, 'LONGITUDE': stores_processed.LONGITUDE})

print(stores_processed.shape, '\n', stores_processed.columns)
display(stores_processed.head(3))

(567, 7) 
 Index(['STORE_ID', 'NAME', 'TYPE', 'ADDRESS', 'POSTAL', 'LATITUDE',
       'LONGITUDE'],
      dtype='object')


Unnamed: 0,STORE_ID,NAME,TYPE,ADDRESS,POSTAL,LATITUDE,LONGITUDE
0,1,KIM ENG MINI SUPERMARKET,NEW ECON MINIMART,"103 YISHUN RING ROAD , #01-79, SINGAPORE 760103",760103,1.43136935,103.8288928
1,2,FAIR MART SUPERSTORE,NEW ECON MINIMART,"297 COMPASSVALE STREET, #01-01, SINGAPORE 540297",540297,1.395227471,103.8998345
2,3,1588 LE PTE LTD,NEW ECON MINIMART,"158B RIVERVALE CRESCENT, #01-695, SINGAPORE 54...",542158,1.38841338,103.9066762


In [43]:
stores_processed.to_csv(f"{DATA_DIR_PROCESSED}/stores/sg_stores_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve Post Offices

In [44]:
om_svc = OneMapEssentialServiceApi(EMAIL, PASSWORD, TOKEN_FILE)
om_svc

<__main__.OneMapEssentialServiceApi at 0x16dc54cc048>

In [None]:
res = om_svc.queryEssentialService('retrieveNearbyPostOffice')
res

In [46]:
po = pd.DataFrame(res['SearchResults'])

print(po.shape, '\n', po.columns)
display(po.head(3))

(24, 7) 
 Index(['NAME', 'ADDRESS', 'LATITUDE', 'LONGITUDE', 'POSTAL_CODE', 'X', 'Y'], dtype='object')


Unnamed: 0,NAME,ADDRESS,LATITUDE,LONGITUDE,POSTAL_CODE,X,Y
0,ALEXANDRA,"110 ALEXANDRA ROAD #01-01, SINGAPORE 159921",1.291487202,103.819827,159921,26498.5011,30431.61625
1,ANG MO KIO CENTRAL,"727 ANG MO KIO AVE 6 #01-4246, SINGAPORE 560727",1.372655163,103.8460633,560727,29418.32948,39406.75318
2,BEDOK CENTRAL,"218 BEDOK NORTH ST. 1 #01-49, SINGAPORE 460218",1.327492955,103.9342494,460218,39232.58188,34413.17234


In [47]:
po.to_csv(f"{DATA_DIR_RAW}/post_offices/sg_post_offices_geo.csv", encoding='utf-8-sig', index=False)

In [48]:
po_processed = deepcopy(po)

po_processed.drop(['X', 'Y'], axis=1, inplace=True)

po_processed = pd.DataFrame({'PO_ID': list(range(1, len(po_processed)+1)), 'NAME': po_processed.NAME, 
                             'ADDRESS': po_processed.ADDRESS, 'POSTAL': po_processed.POSTAL_CODE, 
                             'LATITUDE': po_processed.LATITUDE, 'LONGITUDE': po_processed.LONGITUDE})

print(po_processed.shape, '\n', po_processed.columns)
display(po_processed.head(3))

(24, 6) 
 Index(['PO_ID', 'NAME', 'ADDRESS', 'POSTAL', 'LATITUDE', 'LONGITUDE'], dtype='object')


Unnamed: 0,PO_ID,NAME,ADDRESS,POSTAL,LATITUDE,LONGITUDE
0,1,ALEXANDRA,"110 ALEXANDRA ROAD #01-01, SINGAPORE 159921",159921,1.291487202,103.819827
1,2,ANG MO KIO CENTRAL,"727 ANG MO KIO AVE 6 #01-4246, SINGAPORE 560727",560727,1.372655163,103.8460633
2,3,BEDOK CENTRAL,"218 BEDOK NORTH ST. 1 #01-49, SINGAPORE 460218",460218,1.327492955,103.9342494


In [49]:
po_processed.to_csv(f"{DATA_DIR_PROCESSED}/post_offices/sg_post_offices_geo.csv", encoding='utf-8-sig', index=False)

### Retrieve Bus Stops

In [5]:
LTA_DATA_MALL_KEY = pd.read_csv('C:/Users/Hari/Desktop/CS3244/lta_datamall_token.txt', header=None).iloc[0][0]

In [6]:
bus_stops_list = []

for i in range(1, 5500//500):
    res = requests.get(f"http://datamall2.mytransport.sg/ltaodataservice/BusStops?$skip={i*500}", headers=_headers(AccountKey=LTA_DATA_MALL_KEY))
    bus_stops_list.extend(res.json()['value'])

bus_stops = pd.DataFrame(bus_stops_list)

print(bus_stops.shape, '\n', bus_stops.columns)
display(bus_stops.head(3))

(4571, 5) 
 Index(['BusStopCode', 'RoadName', 'Description', 'Latitude', 'Longitude'], dtype='object')


Unnamed: 0,BusStopCode,RoadName,Description,Latitude,Longitude
0,14051,Henderson Rd,Aft Telok Blangah Hts,1.275847,103.815157
1,14059,Henderson Rd,Bef Telok Blangah Hts,1.276025,103.815613
2,14061,Keppel Rd,Opp Former Railway Stn,1.272313,103.83862


In [7]:
bus_stops.to_csv(f"{DATA_DIR_RAW}/bus_stops/sg_bus_stops_geo.csv", encoding='utf-8-sig', index=False)

In [8]:
bus_stops_processed = deepcopy(bus_stops)

bus_stops_processed.columns = bus_stops_processed.columns.str.upper()

bus_stops_processed.rename({'BUSSTOPCODE': 'CODE', 'ROADNAME': 'STREET_NAME'}, axis=1, inplace=True)

bus_stops_processed.STREET_NAME = bus_stops_processed.STREET_NAME.str.upper()
bus_stops_processed.DESCRIPTION = bus_stops_processed.DESCRIPTION.str.upper()

print(bus_stops_processed.shape, '\n', bus_stops_processed.columns)
display(bus_stops_processed.head(3))

(4571, 5) 
 Index(['CODE', 'STREET_NAME', 'DESCRIPTION', 'LATITUDE', 'LONGITUDE'], dtype='object')


Unnamed: 0,CODE,STREET_NAME,DESCRIPTION,LATITUDE,LONGITUDE
0,14051,HENDERSON RD,AFT TELOK BLANGAH HTS,1.275847,103.815157
1,14059,HENDERSON RD,BEF TELOK BLANGAH HTS,1.276025,103.815613
2,14061,KEPPEL RD,OPP FORMER RAILWAY STN,1.272313,103.83862


In [9]:
bus_stops_processed.to_csv(f"{DATA_DIR_PROCESSED}/bus_stops/sg_bus_stops_geo.csv", encoding='utf-8-sig', index=False)

### Geocode Transacted Properties

In [5]:
df = pd.read_csv('./data_processed/resale_flat_prices/resale_flat_prices_all.csv')

print(df.shape, '\n', df.columns)
display(df.head(3))

(880848, 11) 
 Index(['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range',
       'floor_area_sqm', 'flat_model', 'lease_commence_date', 'resale_price',
       'remaining_lease'],
      dtype='object')


  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,remaining_lease
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,9000.0,
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977,6000.0,
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,8000.0,


In [6]:
building = df[~df.duplicated(['block', 'street_name'])][['block', 'street_name']]
building['address'] = 'BLOCK ' + building.block + ' ' + building.street_name

print(building.shape, '\n', building.columns)
display(building.head(3))

(9602, 3) 
 Index(['block', 'street_name', 'address'], dtype='object')


Unnamed: 0,block,street_name,address
0,309,ANG MO KIO AVE 1,BLOCK 309 ANG MO KIO AVE 1
4,216,ANG MO KIO AVE 1,BLOCK 216 ANG MO KIO AVE 1
5,211,ANG MO KIO AVE 3,BLOCK 211 ANG MO KIO AVE 3


In [7]:
GOOGLE_MAPS_KEY = pd.read_csv('C:/Users/Hari/Desktop/CS3244/google_maps_token.txt', header=None).iloc[0][0]

In [8]:
gmaps = googlemaps.Client(key = GOOGLE_MAPS_KEY)
gmaps

<googlemaps.client.Client at 0x1adf95de988>

In [None]:
building_latitudes = []
building_longitudes = []
missed_addresses = []

for i, address in enumerate(building.address):
    print(f"Trying address {i+1}, {address}")
    res = gmaps.geocode(address)
    
    if (i%40 == 0):
        time.sleep(2)
    
    try:
        lat = res[0]['geometry']['location']['lat']
        lng = res[0]['geometry']['location']['lng']
        building_latitudes.append(lat)
        building_longitudes.append(lng)
        print(f"{address} success!")
    except Exception as e:
        print(e)
        print(f"{address} failed!")
        missed_addresses.append(address)
        building_latitudes.append(None)
        building_longitudes.append(None)

print(f"Missed Addresses: {missed_addresses}")

In [24]:
building['latitude'] = building_latitudes
building['longitude'] = building_longitudes

print(building.shape, '\n', building.columns)
display(building.head(3))

(9602, 5) 
 Index(['block', 'street_name', 'address', 'latitude', 'longitude'], dtype='object')


Unnamed: 0,block,street_name,address,latitude,longitude
0,309,ANG MO KIO AVE 1,BLOCK 309 ANG MO KIO AVE 1,1.364329,103.84411
4,216,ANG MO KIO AVE 1,BLOCK 216 ANG MO KIO AVE 1,1.366207,103.841418
5,211,ANG MO KIO AVE 3,BLOCK 211 ANG MO KIO AVE 3,1.36921,103.8416


In [27]:
print(len(missed_addresses))
print(building['latitude'].isna().sum())
print(building['longitude'].isna().sum())

149
149
149


In [28]:
building.to_csv(f"{DATA_DIR_PROCESSED}/hdb_buildings/resale_hdb_buildings_geo.csv", encoding='utf-8-sig', index=False)