In [72]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [73]:
# CRITICAL: Singapore's Projected Coordinate System (Metric)
# We MUST use this. Do not use EPSG:4326 (Lat/Lon).
CRS_PROJ = "EPSG:3414"

In [74]:
raw_path = "../data/data_raw/"

- From the HDB report (refer to HDB_Key_Statistics_2025.pdf in data/data_raw), the total HDB population is 3,190,590 (3.2 million).

In [75]:
hdb_population = 3190590

In [76]:
planning_area = gpd.read_file(f"{raw_path}MasterPlan2019PlanningAreaBoundaryNoSea.geojson")
subzone = gpd.read_file(f"{raw_path}MasterPlan2019SubzoneBoundaryNoSeaGEOJSON.geojson")
sport_facilities  = gpd.read_file(f"{raw_path}SportSGSportFacilitiesGEOJSON.geojson")
parks = gpd.read_file(f"{raw_path}NParksParksandNatureReserves.geojson")
park_connector = gpd.read_file(f"{raw_path}ParkConnectorLoop.geojson")
cycling_paths = gpd.read_file(f"{raw_path}CyclingPathNetwork.geojson")
hdb = gpd.read_file(f"{raw_path}HDBExistingBuilding.geojson")

In [77]:
planning_area = planning_area.to_crs(CRS_PROJ)
subzone = subzone.to_crs(CRS_PROJ)
sport_facilities = sport_facilities.to_crs(CRS_PROJ)
park_connector = park_connector.to_crs(CRS_PROJ)
parks = parks.to_crs(CRS_PROJ)
cycling_paths = cycling_paths.to_crs(CRS_PROJ)
hdb = hdb.to_crs(CRS_PROJ)

In [78]:
hdb.head()

Unnamed: 0,name,geometry
0,Path,"LINESTRING (24481.043 47869.63, 24480.427 4786..."
1,Path,"LINESTRING (18753.211 37893.756, 18764.786 379..."
2,Path,"LINESTRING (41887.998 37282.122, 41889.532 372..."
3,Path,"LINESTRING (18696.214 41806.326, 18691.537 418..."
4,Path,"LINESTRING (26218.761 29036.217, 26222.34 2904..."


- Assume that the residents who live in HDB is uniformly distributed

In [94]:
# Count number of HDB buildings
hdb_num = len(hdb)
hdb_unit_num = 1153080
hdb_population_per_block = hdb_population / hdb_num
hdb_population_per_unit = hdb_population / hdb_unit_num

print(f"Number of HDB buildings: {hdb_num}")
print(f"Rough estimation of population per HDB block: {hdb_population_per_block}")
print(f"Number of HDB units: {hdb_unit_num}")
print(f"Rough estimation of population per HDB unit: {hdb_population_per_unit}")

Number of HDB buildings: 13160
Rough estimation of population per HDB block: 242.44604863221883
Number of HDB units: 1153080
Rough estimation of population per HDB unit: 2.7670152981579768


In [95]:
# Add population column to HDB geodataframe (per-block estimate)
hdb['population'] = hdb_population_per_block
hdb.head()

Unnamed: 0,name,geometry,population
0,Path,"LINESTRING (24481.043 47869.63, 24480.427 4786...",242.446049
1,Path,"LINESTRING (18753.211 37893.756, 18764.786 379...",242.446049
2,Path,"LINESTRING (41887.998 37282.122, 41889.532 372...",242.446049
3,Path,"LINESTRING (18696.214 41806.326, 18691.537 418...",242.446049
4,Path,"LINESTRING (26218.761 29036.217, 26222.34 2904...",242.446049


In [96]:
subzone.head()

Unnamed: 0,OBJECTID,SUBZONE_NO,SUBZONE_N,SUBZONE_C,CA_IND,PLN_AREA_N,PLN_AREA_C,REGION_N,REGION_C,INC_CRC,FMEL_UPD_D,SHAPE.AREA,SHAPE.LEN,geometry
0,676,12,DEPOT ROAD,BMSZ12,N,BUKIT MERAH,BM,CENTRAL REGION,CR,C22DED671DE2A940,20191223152313,442297.509556,4281.667289,"POLYGON ((25910.344 29425.34, 25880.344 29441...."
1,677,2,BUKIT MERAH,BMSZ02,N,BUKIT MERAH,BM,CENTRAL REGION,CR,085EF219A5A1AEAD,20191223152313,411722.830243,3074.963234,"POLYGON ((26750.092 29216.098, 26750.091 29224..."
2,678,3,CHINATOWN,OTSZ03,Y,OUTRAM,OT,CENTRAL REGION,CR,EF2B9A91AF49E025,20191223152313,587222.679344,4297.599898,"POLYGON ((29161.201 29723.071, 29147.201 29734..."
3,679,4,PHILLIP,DTSZ04,Y,DOWNTOWN CORE,DT,CENTRAL REGION,CR,615D4EDDEF809F8E,20191223152313,39437.93527,871.554888,"POLYGON ((29814.107 29616.894, 29814.868 29619..."
4,680,5,RAFFLES PLACE,DTSZ05,Y,DOWNTOWN CORE,DT,CENTRAL REGION,CR,72107B11807074F4,20191223152313,188767.489706,1872.752161,"POLYGON ((30137.768 29843.194, 30118.735 29880..."


In [97]:
planning_area.head()

Unnamed: 0,Name,Description,geometry
0,BEDOK,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((38991.258 31986.565 0, 38991.259 3..."
1,BOON LAY,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((15436.019 34495.789 0, 15391.952 3..."
2,BUKIT BATOK,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((20294.455 39114.528 0, 20334.318 3..."
3,BUKIT MERAH,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((26920.022 26969.57 0, 26920.169 26..."
4,BUKIT PANJANG,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((21448.724 41356.741 0, 21508.535 4..."


In [98]:
# # Extract the Planning Area Name from the HTML Description
# Pattern: <th>PLN_AREA_N</th> <td>BEDOK</td>
planning_area['Name'] = planning_area['Description'].str.extract(r'<th>PLN_AREA_N</th> <td>(.*?)</td>')

# Display the new column to verify
planning_area.head()

Unnamed: 0,Name,Description,geometry
0,BEDOK,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((38991.258 31986.565 0, 38991.259 3..."
1,BOON LAY,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((15436.019 34495.789 0, 15391.952 3..."
2,BUKIT BATOK,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((20294.455 39114.528 0, 20334.318 3..."
3,BUKIT MERAH,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((26920.022 26969.57 0, 26920.169 26..."
4,BUKIT PANJANG,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((21448.724 41356.741 0, 21508.535 4..."


In [99]:
planning_area.explore()

In [101]:
facilities_num = len(sport_facilities) + len(parks) + len(park_connector) + len(cycling_paths)
facilities_num

5637