In [1]:
import geopandas as gpd
import h3pandas
from shapely.geometry import Point, Polygon
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

### 01.1 Load into DuckDB

In [2]:
import duckdb

# 1. SETUP: Connect to a persistent database file
#    This creates 'sport_equity.db' so you don't have to reload data every time.
db_path = f"sport_equity.db"
con = duckdb.connect(db_path)

# 2. EXTENSIONS: Install Spatial support (Crucial for your maps)
print("Installing Spatial Extension...")
con.install_extension("spatial")
con.load_extension("spatial")

Installing Spatial Extension...


In [3]:
import os

raw_path = os.path.abspath("../data/data_raw")
raw_path

'/home/amber/Equity_Analysis/data/data_raw'

In [4]:
# ==========================================
# PART A: BATCH IMPORT DEMOGRAPHICS (CSVs)
# ==========================================
print("Importing Top Sports & Exercise CSVs...")
# This reads all "Top Sports" files at once and adds a 'source_file' column
# so you know which row came from "Malays", "Females", etc.
# Using a broader wildcard 'Top Sports & Exercise*.csv' to avoid issues with the specific dash character
# And union_by_name=True to handle schema mismatches
con.sql(f"""
    CREATE OR REPLACE TABLE demographics_sports AS 
    SELECT * 
    FROM read_csv_auto('{raw_path}/Top Sports & Exercise*.csv', filename=true, union_by_name=true);
""")

# Import Barriers and Motivations
print("Importing Barriers & Motivations...")
con.sql(f"""
    CREATE OR REPLACE TABLE barriers AS 
    SELECT * FROM read_csv_auto('{raw_path}/BarrierstoParticipationinSportPhysicalActivity2022.csv');
""")

con.sql(f"""
    CREATE OR REPLACE TABLE motivations AS 
    SELECT * FROM read_csv_auto('{raw_path}/MotivationstoParticipateinSportPhysicalActivity2022.csv');
""")

Importing Top Sports & Exercise CSVs...
Importing Barriers & Motivations...


In [5]:
# ==========================================
# PART B: SPATIAL LAYERS (GeoJSON & KML)
# ==========================================
print("Importing Geospatial Layers (Facilities, MRT, Parks)...")


# 1. Sports Facilities (Supply)
print("Importing Sports Facilities")
con.sql(f"""
    CREATE OR REPLACE TABLE facilities AS 
    SELECT
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/SportSGSportFacilitiesGEOJSON.geojson');
""")

# 2. MRT Stations (Connectivity)
print("Importing MRT Stations")
con.sql(f"""
    CREATE OR REPLACE TABLE mrt_stations AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/LTAMRTStationExitGEOJSON.geojson');
""")

# 3. Parks (Nature)
print("Importing Parks")
con.sql(f"""
    CREATE OR REPLACE TABLE parks AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/NParksParksandNatureReserves.geojson');
""")


# 4. Park Connector Loop
print("Importing Park Connector Loop")
con.sql(f"""
    CREATE OR REPLACE TABLE park_connector AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/ParkConnectorLoop.geojson');
""")

# 5. Cycling Infrastructure
print("Importing Cycling Infrastructure")
con.sql(f"""
    CREATE OR REPLACE TABLE cycling_infra AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/CyclingPathNetwork.geojson');
""")

# 6. Existing HDB Buildings
print("Importing Existing HDB Buildings")
con.sql(f"""
    CREATE OR REPLACE TABLE hdb_buildings AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/HDBExistingBuilding.geojson');
""")


Importing Geospatial Layers (Facilities, MRT, Parks)...
Importing Sports Facilities
Importing MRT Stations
Importing Parks
Importing Parks
Importing Park Connector Loop
Importing Cycling Infrastructure
Importing Park Connector Loop
Importing Cycling Infrastructure
Importing Existing HDB Buildings
Importing Existing HDB Buildings


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [6]:
# ==========================================
# PART C: TRANSPORT NETWORK (JSON / GeoJSON)
# ==========================================
print("Importing Bus Transport Network...")

# GeoJSON Routes & Stops
con.sql(f"""
    CREATE OR REPLACE TABLE transport_routes AS 
    SELECT * FROM ST_Read('{raw_path}/routes.geojson');
""")

con.sql(f"""
    CREATE OR REPLACE TABLE transport_stops AS 
    SELECT * FROM ST_Read('{raw_path}/stops.geojson');
""")

# Pure JSON files (Services/FirstLast)
con.sql(f"""
    CREATE OR REPLACE TABLE transport_services AS 
    SELECT * FROM read_json_auto('{raw_path}/services.json');
""")

con.sql(f"""
    CREATE OR REPLACE TABLE transport_firstlast AS 
    SELECT * FROM read_json_auto('{raw_path}/firstlast.json');
""")


Importing Bus Transport Network...


In [7]:
# ==========================================
# PART D: PLANNING AREA LAYERS
# ==========================================

print("Importing PLANNING AREA LAYERS ")

print("Importing Master Plan 2019 Planning Area")
con.sql(f"""
    CREATE OR REPLACE TABLE planning_areas AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/MasterPlan2019PlanningAreaBoundaryNoSea.geojson');
""")

print("Importing Master Plan 2019 Subzone Planning Area")
con.sql(f"""
    CREATE OR REPLACE TABLE planning_areas_subzone AS 
    SELECT 
        * EXCLUDE (geom),
        ST_Transform(geom, 'EPSG:4326', 'EPSG:3414', true) AS geom 
    FROM ST_Read('{raw_path}/MasterPlan2019SubzoneBoundaryNoSeaGEOJSON.geojson');
""")


Importing PLANNING AREA LAYERS 
Importing Master Plan 2019 Planning Area
Importing Master Plan 2019 Subzone Planning Area
Importing Master Plan 2019 Subzone Planning Area


In [8]:
# ==========================================
# PART E: VERIFICATION
# ==========================================
print("\n--- IMPORT COMPLETE. TABLE SUMMARY ---")
con.sql("SHOW TABLES").show()

# Example Check: See the top 3 rows of the compiled demographics table
print("\n--- PREVIEW: Demographics Data ---")
con.sql("SELECT filename, jogging, \"yoga\" FROM demographics_sports").show()



--- IMPORT COMPLETE. TABLE SUMMARY ---
┌────────────────────────────────┐
│              name              │
│            varchar             │
├────────────────────────────────┤
│ barriers                       │
│ cycling_infra                  │
│ demographics_sports            │
│ facilities                     │
│ hdb_buildings                  │
│ motivations                    │
│ mrt_stations                   │
│ park_connector                 │
│ parks                          │
│ planning_area                  │
│ planning_area_stats            │
│ planning_area_stats_aggregated │
│ planning_area_subzone          │
│ planning_areas                 │
│ planning_areas_subzone         │
│ transport_firstlast            │
│ transport_routes               │
│ transport_services             │
│ transport_stops                │
├────────────────────────────────┤
│            19 rows             │
└────────────────────────────────┘


--- PREVIEW: Demographics Data ---
┌────────────

In [9]:
con.sql('SHOW ALL TABLES')

┌──────────────┬─────────┬────────────────────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────┐
│   database   │ schema  │              name              │                                                                                                                                                                          column_names                                                                                                                          

In [10]:

# -- For each important table
# DESCRIBE sports_facilities;
# DESCRIBE cycling_paths;
# DESCRIBE demographics;
# DESCRIBE planning_areas;

con.sql('DESCRIBE facilities')


┌─────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│ column_name │ column_type │  null   │   key   │ default │  extra  │
│   varchar   │   varchar   │ varchar │ varchar │ varchar │ varchar │
├─────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ Name        │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Description │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ geom        │ GEOMETRY    │ YES     │ NULL    │ NULL    │ NULL    │
└─────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┘

In [11]:
# Check the coordinate values
con.sql("SELECT ST_AsText(geom) FROM planning_area_subzone LIMIT 1").show()

┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [12]:
con.sql("SELECT ST_AsText(geom) FROM hdb_buildings LIMIT 1").show()

┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [13]:
con.sql("SELECT ST_AsText(geom) FROM mrt_stations LIMIT 1").show()

┌─────────────────────────────────────────────────┐
│                 st_astext(geom)                 │
│                     varchar                     │
├─────────────────────────────────────────────────┤
│ POINT Z (36438.82750006078 35234.52679993625 0) │
└─────────────────────────────────────────────────┘



### 01.2 Spatial Join

In [14]:
con.sql("SELECT * FROM facilities LIMIT 5").show()

┌─────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [15]:
con.sql('DESCRIBE planning_areas')


┌─────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│ column_name │ column_type │  null   │   key   │ default │  extra  │
│   varchar   │   varchar   │ varchar │ varchar │ varchar │ varchar │
├─────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ Name        │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Description │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ geom        │ GEOMETRY    │ YES     │ NULL    │ NULL    │ NULL    │
└─────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┘

In [16]:
con.sql('DESCRIBE planning_areas')


┌─────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│ column_name │ column_type │  null   │   key   │ default │  extra  │
│   varchar   │   varchar   │ varchar │ varchar │ varchar │ varchar │
├─────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ Name        │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ Description │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ geom        │ GEOMETRY    │ YES     │ NULL    │ NULL    │ NULL    │
└─────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┘

In [17]:
con.sql("SELECT Name, Description FROM planning_areas LIMIT 5").show()

┌─────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│  Name   │                                                                                                                                                                                                                                                                    Description                                                                                                                                                                             

In [18]:
con.sql("SELECT * FROM planning_areas_subzone LIMIT 5").show()

┌──────────┬────────────┬───────────────┬───────────┬─────────┬───────────────┬────────────┬────────────────┬──────────┬──────────────────┬────────────────┬──────────────────┬──────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [19]:
# We use a spatial JOIN. 
# "Join facilities, cycling paths, parks, and park connectors to planning areas"

con.sql("""
    CREATE OR REPLACE TABLE planning_area_stats AS
    SELECT 
        -- Extract the real Planning Area Name from the HTML Description
        regexp_extract(pa.Description, '<th>PLN_AREA_N</th> <td>(.*?)</td>', 1) AS planning_area,
        pa.geom,
        
        -- 1. Count Facilities (Points)
        (SELECT COUNT(*) 
         FROM facilities f 
         WHERE ST_Intersects(f.geom, pa.geom)) AS facility_count,
        
        -- 2. Count MRT (Points)
        (SELECT COUNT(*) 
         FROM mrt_stations m 
         WHERE ST_Intersects(m.geom, pa.geom)) AS mrt_count,
        
        -- 3. Sum Cycling Path Length (Lines)
        COALESCE((SELECT SUM(ST_Length(ST_Intersection(c.geom, pa.geom))) 
         FROM cycling_infra c 
         WHERE ST_Intersects(c.geom, pa.geom)), 0) AS cycling_length_m,

        -- 4. Sum Park Connector Length (Lines)
        COALESCE((SELECT SUM(ST_Length(ST_Intersection(pc.geom, pa.geom))) 
         FROM park_connector pc 
         WHERE ST_Intersects(pc.geom, pa.geom)), 0) AS pcn_length_m,

        -- 5. Sum Park Area (Polygons)
        COALESCE((SELECT SUM(ST_Area(ST_Intersection(p.geom, pa.geom))) 
         FROM parks p 
         WHERE ST_Intersects(p.geom, pa.geom)), 0) AS park_area_sqm

    FROM planning_areas pa
    ORDER BY facility_count DESC;
""")

# Check the result
con.sql("SELECT * EXCLUDE(geom) FROM planning_area_stats LIMIT 10").show()

┌───────────────┬────────────────┬───────────┬────────────────────┬────────────────────┬────────────────────┐
│ planning_area │ facility_count │ mrt_count │  cycling_length_m  │    pcn_length_m    │   park_area_sqm    │
│    varchar    │     int64      │   int64   │       double       │       double       │       double       │
├───────────────┼────────────────┼───────────┼────────────────────┼────────────────────┼────────────────────┤
│ KALLANG       │              5 │        20 │                0.0 │  14963.40330593513 │  61034.01616422864 │
│ BUKIT BATOK   │              2 │        10 │  569.2657637800825 │ 11185.807331188686 │   948557.294471612 │
│ CLEMENTI      │              2 │         4 │ 1658.8311444070696 │ 11788.067531161541 │ 169064.60473211418 │
│ BUKIT MERAH   │              2 │        20 │                0.0 │   6634.98104599742 │  1320529.310831947 │
│ ANG MO KIO    │              2 │        19 │  7817.181259770624 │  5626.439442779658 │  866927.8954175821 │
│ BEDOK   

In [20]:
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import MinMaxScaler

# # 1. Fetch data (converting geometry to WKB for easy GeoPandas consumption)
# # We use ST_AsWKB to ensure we get a standard binary format that GeoPandas can read
# df_stats = con.sql("""
#     SELECT 
#         planning_area, 
#         facility_count, 
#         mrt_count,
#         cycling_length_m, 
#         park_area_sqm, 
#         pcn_length_m,
#         ST_AsWKB(geom) as geom 
#     FROM planning_area_stats
# """).df()

# # 2. Normalize columns
# scaler = MinMaxScaler()
# cols_to_normalize = ['facility_count', 'mrt_count', 'cycling_length_m', 'pcn_length_m', 'park_area_sqm']
# df_stats[cols_to_normalize] = scaler.fit_transform(df_stats[cols_to_normalize])

# # 3. Convert to GeoDataFrame
# # DuckDB returns bytearray, but shapely expects bytes
# df_stats['geometry'] = gpd.GeoSeries.from_wkb(df_stats['geom'].apply(bytes))
# gdf_stats = gpd.GeoDataFrame(df_stats, geometry='geometry')

# # 4. Plotting
# fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# # Helper function to plot
# def plot_map(ax, column, title, cmap):
#     gdf_stats.plot(column=column, ax=ax, legend=True, cmap=cmap, legend_kwds={'shrink': 0.5})
#     ax.set_title(title)
#     ax.set_axis_off()

# plot_map(axes[0,0], 'facility_count', 'Normalized Sports Facilities', 'OrRd')
# plot_map(axes[0,1], 'cycling_length_m', 'Normalized Cycling Path Length', 'Blues')
# plot_map(axes[1,0], 'park_area_sqm', 'Normalized Park Area', 'Greens')
# plot_map(axes[1,1], 'pcn_length_m', 'Normalized Park Connector Length', 'Purples')

# plt.suptitle("Spatial Distribution of Sports & Recreational Amenities (Normalized)", fontsize=16)
# plt.tight_layout()
# plt.show()

In [21]:
# Aggregate results using touching planning_areas in DuckDB
# 1. Normalize the raw counts
# 2. Self-join on ST_Touches to find neighbors
# 3. Average the normalized values of neighbors

con.sql("""
    CREATE OR REPLACE TABLE planning_area_stats_aggregated AS
    WITH stats_bounds AS (
        SELECT 
            MIN(facility_count) as min_fc, MAX(facility_count) as max_fc,
            MIN(mrt_count) as min_mrt, MAX(mrt_count) as max_mrt,
            MIN(cycling_length_m) as min_cycle, MAX(cycling_length_m) as max_cycle,
            MIN(pcn_length_m) as min_pcn, MAX(pcn_length_m) as max_pcn,
            MIN(park_area_sqm) as min_park, MAX(park_area_sqm) as max_park
        FROM planning_area_stats
    ),
    normalized AS (
        SELECT 
            pa.planning_area,
            pa.geom,
            -- Normalize (Value - Min) / (Max - Min)
            (pa.facility_count - sb.min_fc) / NULLIF(sb.max_fc - sb.min_fc, 0) AS norm_facility,
            (pa.mrt_count - sb.min_mrt) / NULLIF(sb.max_mrt - sb.min_mrt, 0) AS norm_mrt,
            (pa.cycling_length_m - sb.min_cycle) / NULLIF(sb.max_cycle - sb.min_cycle, 0) AS norm_cycle,
            (pa.pcn_length_m - sb.min_pcn) / NULLIF(sb.max_pcn - sb.min_pcn, 0) AS norm_pcn,
            (pa.park_area_sqm - sb.min_park) / NULLIF(sb.max_park - sb.min_park, 0) AS norm_park
        FROM planning_area_stats pa, stats_bounds sb
    )
    SELECT
        a.planning_area,
        a.geom,
        -- Aggregate Neighbors (Average of touching polygons)
        -- If no neighbors (AVG returns NULL), use own value
        COALESCE(AVG(b.norm_facility), a.norm_facility) AS agg_facility,
        COALESCE(AVG(b.norm_mrt), a.norm_mrt) AS agg_mrt,
        COALESCE(AVG(b.norm_cycle), a.norm_cycle) AS agg_cycle,
        COALESCE(AVG(b.norm_pcn), a.norm_pcn) AS agg_pcn,
        COALESCE(AVG(b.norm_park), a.norm_park) AS agg_park
        
    FROM normalized a
    LEFT JOIN normalized b 
        ON ST_Touches(a.geom, b.geom)
    GROUP BY a.planning_area, a.geom, a.norm_facility, a.norm_mrt, a.norm_cycle, a.norm_pcn, a.norm_park
""")

# Calculate Index Score (Sum of aggregated normalized values)
con.sql("""
    ALTER TABLE planning_area_stats_aggregated ADD COLUMN index_score DOUBLE;
    UPDATE planning_area_stats_aggregated 
    SET index_score = (agg_facility + agg_mrt + agg_cycle + agg_pcn + agg_park);
""")

con.sql("SELECT * EXCLUDE(geom) FROM planning_area_stats_aggregated ORDER BY index_score ASC LIMIT 5").show()

┌──────────────────┬──────────────┬──────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬──────────────────────┐
│  planning_area   │ agg_facility │       agg_mrt        │       agg_cycle       │        agg_pcn        │       agg_park        │     index_score      │
│     varchar      │    double    │        double        │        double         │        double         │        double         │        double        │
├──────────────────┼──────────────┼──────────────────────┼───────────────────────┼───────────────────────┼───────────────────────┼──────────────────────┤
│ SOUTHERN ISLANDS │          0.0 │                  0.0 │                   0.0 │                   0.0 │                   0.0 │                  0.0 │
│ TUAS             │          0.0 │ 0.023809523809523808 │                   0.0 │                   0.0 │                   0.0 │ 0.023809523809523808 │
│ LIM CHU KANG     │          0.0 │ 0.031746031746031744 │                  

In [29]:
# Fetch combined data: Raw stats + Calculated Index Score
# We join the raw stats table with the aggregated score table
df_stats = con.sql("""
    SELECT 
        t1.planning_area, 
        t1.facility_count, 
        t1.mrt_count,
        t1.cycling_length_m, 
        t1.park_area_sqm, 
        t1.pcn_length_m,
        t2.index_score,
        ST_AsWKB(t1.geom) as geom 
    FROM planning_area_stats t1
    JOIN planning_area_stats_aggregated t2 
      ON t1.planning_area = t2.planning_area
""").df()

# Convert to GeoDataFrame
df_stats['geometry'] = gpd.GeoSeries.from_wkb(df_stats['geom'].apply(bytes))
gdf_stats = gpd.GeoDataFrame(df_stats, geometry='geometry')

# 1. Set the CRS to EPSG:3414 (Singapore SVY21) because that's what is in DuckDB
gdf_stats = gdf_stats.set_crs("EPSG:3414", allow_override=True)

# # 2. Reproject to EPSG:4326 (Lat/Lon) for Leafmap/Web Mapping
# gdf_stats = gdf_stats.to_crs("EPSG:4326")

gdf_stats.head()

Unnamed: 0,planning_area,facility_count,mrt_count,cycling_length_m,park_area_sqm,pcn_length_m,index_score,geom,geometry
0,KALLANG,5,20,0.0,61034.02,14963.403306,0.555882,"[1, 235, 3, 0, 0, 1, 0, 0, 0, 192, 1, 0, 0, 99...","POLYGON Z ((31277.373 34723.293 0, 31305.947 3..."
1,BUKIT BATOK,2,10,569.265764,948557.3,11185.807331,0.787303,"[1, 235, 3, 0, 0, 1, 0, 0, 0, 20, 1, 0, 0, 230...","POLYGON Z ((20294.455 39114.528 0, 20334.318 3..."
2,CLEMENTI,2,4,1658.831144,169064.6,11788.067531,0.982642,"[1, 235, 3, 0, 0, 1, 0, 0, 0, 71, 1, 0, 0, 65,...","POLYGON Z ((20627.133 34823.182 0, 20628.049 3..."
3,BUKIT MERAH,2,20,0.0,1320529.0,6634.981046,0.626925,"[1, 235, 3, 0, 0, 1, 0, 0, 0, 115, 9, 0, 0, 20...","POLYGON Z ((26920.022 26969.57 0, 26920.169 26..."
4,ANG MO KIO,2,19,7817.18126,866927.9,5626.439443,1.254181,"[1, 235, 3, 0, 0, 1, 0, 0, 0, 223, 1, 0, 0, 11...","POLYGON Z ((30658.5 42047.527 0, 30670.406 420..."


In [30]:
import leafmap

# Initialize map centered on Singapore
m = leafmap.Map(center=[1.3521, 103.8198], zoom=11)
m.add_data(
    gdf_stats, column="index_score", scheme="Quantiles", cmap="Reds", legend_title="Index"
)
m

Map(center=[1.3521, 103.8198], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…