In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import osmnx as ox
import pandas as pd
from shapely import geometry
import libpysal

In [3]:
# CONSTANTS
CRS_PROJ = "EPSG:3414"  # Singapore SVY21
DISTANCE_THRESHOLD = 1500  # 1.5km max walking distance (~20 mins)

# 1. Load Singapore Network (Walk)
# Note: This might take a while. For testing, use a smaller area like "Tampines, Singapore"
place_name = "Singapore"
network_type = "walk"
G = ox.graph_from_place(place_name, network_type=network_type)

# Project Graph to Meters (Critical for distance calc)
G_proj = ox.project_graph(G, to_crs=CRS_PROJ)

In [7]:
raw_path = "../data/data_raw/"

In [13]:
hdb_population = 3190590
hdb_unit_num = 1153080

planning_area = gpd.read_file(f"{raw_path}MasterPlan2019PlanningAreaBoundaryNoSea.geojson")
subzone = gpd.read_file(f"{raw_path}MasterPlan2019SubzoneBoundaryNoSeaGEOJSON.geojson")
sport_facilities  = gpd.read_file(f"{raw_path}SportSGSportFacilitiesGEOJSON.geojson")
parks = gpd.read_file(f"{raw_path}NParksParksandNatureReserves.geojson")
park_connector = gpd.read_file(f"{raw_path}ParkConnectorLoop.geojson")
cycling_paths = gpd.read_file(f"{raw_path}CyclingPathNetwork.geojson")
hdb = gpd.read_file(f"{raw_path}HDBExistingBuilding.geojson")

In [14]:
planning_area = planning_area.to_crs(CRS_PROJ)
subzone = subzone.to_crs(CRS_PROJ)
sport_facilities = sport_facilities.to_crs(CRS_PROJ)
park_connector = park_connector.to_crs(CRS_PROJ)
parks = parks.to_crs(CRS_PROJ)
cycling_paths = cycling_paths.to_crs(CRS_PROJ)
hdb = hdb.to_crs(CRS_PROJ)

In [15]:
# Count number of HDB buildings
hdb_num = len(hdb)
hdb_population_per_block = hdb_population / hdb_num
hdb_population_per_unit = hdb_population / hdb_unit_num

print(f"Number of HDB buildings: {hdb_num}")
print(f"Rough estimation of population per HDB block: {hdb_population_per_block}")
print(f"Number of HDB units: {hdb_unit_num}")
print(f"Rough estimation of population per HDB unit: {hdb_population_per_unit}")

Number of HDB buildings: 13160
Rough estimation of population per HDB block: 242.44604863221883
Number of HDB units: 1153080
Rough estimation of population per HDB unit: 2.7670152981579768


In [None]:
def facility_preprocessing(gdf_facilities):
    """
    Prepares SportSG Facilities Data.
    """
    # Ensure CRS
    gdf_facilities = gdf_facilities.to_crs(CRS_PROJ)
    
    # Change the geometry from polygon Z to point (centroid )
    gdf_facilities['geometry'] = gdf_facilities.geometry.centroid
    
    # Assign ID
    gdf_facilities['facility_id'] = range(len(gdf_facilities))
    
    # Ensure we have a capacity column (Supply). 
    # Since floor area is not available, we'll assume 1
    if 'capacity' not in gdf_facilities.columns:
        gdf_facilities['capacity'] = 1
        
    return gdf_facilities

In [18]:
def population_preprocessing(gdf_pop):
    """
    Prepares HDB Population Data.
    """
    # Ensure CRS
    gdf_pop = gdf_pop.to_crs(CRS_PROJ)
    
    # Ensure we have centroids
    gdf_pop['centroid'] = gdf_pop.geometry.centroid
    
    # Assign ID
    gdf_pop['pop_id'] = range(len(gdf_pop))
    
    # Create a buffer for the "Euclidean Filter" step later
    gdf_pop['search_buffer'] = gdf_pop['centroid'].buffer(DISTANCE_THRESHOLD)
    
    return gdf_pop