# Vancouver Neighborhood Accessibility Index

**Author:** Jerry Yang  
**Date:** 2025-07-10

 **Neighborhood Accessibility Index** for the City of Vancouver using public datasets:
- **Schools** (points)
- **Rapid transit stations** (points)
- **Bikeways** (lines)
- **Parks** (polygons)
- **Local area boundaries** (neighbourhood polygons)


In [None]:

!pip install geopandas shapely pyproj scikit-learn h3pandas h3 leafmap folium matplotlib mapclassify -q

#Enviroment setup


In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point, Polygon
from sklearn.preprocessing import MinMaxScaler
import h3pandas
import leafmap
from pathlib import Path

OUT_DIR = Path("outputs")
OUT_DIR.mkdir(parents=True, exist_ok=True)

#city of Vancouver Data API(opendata)
VAN_LOCAL_AREAS_URL="https://opendata.vancouver.ca/api/records/1.0/download?dataset=local-area-boundary&format=geojson"
VAN_PARKS_URL="https://opendata.vancouver.ca/api/records/1.0/download?dataset=parks&format=geojson"
VAN_BIKEWAYS_URL="https://opendata.vancouver.ca/api/records/1.0/download?dataset=bikeways&format=geojson"
VAN_RAPID_STN_URL="https://opendata.vancouver.ca/api/records/1.0/download?dataset=rapid-transit-stations&format=geojson"
VAN_SCHOOLS_URL="https://opendata.vancouver.ca/api/records/1.0/download?dataset=schools&format=geojson"

#CRS_cords

#in meters and 10N
CRS_EQUAL_AREA="EPSG:26910"  
#WGS84 for both H3 polygon and web_based map
CRS_LATLO="EPSG:4326"   

#Load Datasets

In [None]:
def read_geojson(url):
    gdf= gpd.read_file(url)
    return gdf

neighborhoods= read_geojson(VAN_LOCAL_AREAS_URL)     #polygon
parks= read_geojson(VAN_PARKS_URL)                   #polygon
bike_paths= read_geojson(VAN_BIKEWAYS_URL)           #lines
stations= read_geojson(VAN_RAPID_STN_URL)            #point
schools= read_geojson(VAN_SCHOOLS_URL)               #point

display(neighborhoods.head(2))

display(parks.head(2))

display(bike_paths.head(2))

display(stations.head(2))

display(schools.head(2))

#CRS targetting

In [None]:
targets=[("neighborhoods", neighborhoods),
           ("parks", parks), ("bike_paths", bike_paths),
           ("stations", stations), ("schools", schools)]
proj={}
for name, gdf in targets:
    proj[name]=gdf.to_crs(CRS_EQUAL_AREA)

neighborhoods= proj["neighborhoods"].reset_index(drop=True)

parks= proj["parks"]

bike_paths= proj["bike_paths"]

stations= proj["stations"]

schools= proj["schools"]

print("Working CRS:", neighborhoods.crs)

#Computing vectorized indicators

In [None]:
#stable Ids
neighborhoods['neighbor_id']= neighborhoods.index

#Points within polygons
schools_in= gpd.sjoin(schools, neighborhoods[['neighbor_id','geometry']], how='inner', predicate='within')

stns_in= gpd.sjoin(stations, neighborhoods[['neighbor_id','geometry']], how='inner', predicate='within')

schools_cnt= schools_in.groupby('neighbor_id').size().rename('schools')

stns_cnt= stns_in.groupby('neighbor_id').size().rename('rapid_stations')

#Line clipping
bike_clip= gpd.overlay(bike_paths, neighborhoods[['neighbor_id','geometry']], how='intersection')

bike_clip['seg_len_m']= bike_clip.length

bike_len = bike_clip.groupby('neighbor_id')['seg_len_m'].sum().rename('bike_length_in_m')

#Poly to Poly
parks_clip= gpd.overlay(parks, neighborhoods[['neighbor_id','geometry']], how='intersection')

parks_clip['section_area_m2'] = parks_clip.area

park_area= parks_clip.groupby('neighbor_id')['section_area_m2'].sum().rename('park_area_m2')

#Join metrics

metrics = pd.concat([schools_cnt, stns_cnt, bike_len, park_area], axis=1).fillna(0.0)

neighborhoods= neighborhoods.merge(metrics, on='neighbor_id', how='left')

neighborhoods[['neighbor_id','schools','rapid_stations','bike_length_in_m','park_area_m2']].head()

#Normalization,Smoothing,Composite Index

In [None]:


if "neigh_id" not in neighborhoods.columns and "neighbor_id" in neighborhoods.columns:
    neighborhoods= neighborhoods.rename(columns={"neighbor_id": "neigh_id"})

if "bike_length_m" not in neighborhoods.columns and "bike_length_in_m" in neighborhoods.columns:
    neighborhoods= neighborhoods.rename(columns={"bike_length_in_m": "bike_length_m"})
#column checking
need_cols = ["schools", "rapid_stations", "bike_length_m", "park_area_m2"]
for c in need_cols:
    if c not in neighborhoods.columns:
        raise KeyError(f"Missing column: {c}. Go back to Section 5 and make sure it's created.")

#Unit checking(met to num, 0 filling empty)
neighborhoods["schools"]= pd.to_numeric(neighborhoods["schools"], errors="coerce").fillna(0)

neighborhoods["rapid_stations"]= pd.to_numeric(neighborhoods["rapid_stations"], errors="coerce").fillna(0)

neighborhoods["bike_length_m"]= pd.to_numeric(neighborhoods["bike_length_m"], errors="coerce").fillna(0)

neighborhoods["park_area_m2"]= pd.to_numeric(neighborhoods["park_area_m2"], errors="coerce").fillna(0)

# normalization from 0 to 1, min-,max
for col in need_cols:
    col_min = neighborhoods[col].min()
    
    col_max = neighborhoods[col].max()
    
    col_range = col_max - col_min
    if col_range == 0:
        neighborhoods[col + "_norm"]= 0.0
    else:
        neighborhoods[col + "_norm"]= (neighborhoods[col] - col_min) / col_range

#smoothing neighbors, if no neighbors then keep it as its original normalized val
smooth_cols = ["schools_smooth", "rapid_stations_smooth", "bike_length_m_smooth", "park_area_m2_smooth"]

#filling the empty array
schools_s= []

stns_s= []

bike_s= []

parks_s= []

for i in neighborhoods.index:
    this_geom= neighborhoods.loc[i, "geometry"]

    #masking for whether neighbor is touching currnet one
    touches_mask= neighborhoods.geometry.touches(this_geom)

    #exclusion of the current one
    touches_mask.loc[i]= False

    #pick rows of touching neighbors
    touching_rows = neighborhoods.loc[touches_mask, :]

    #no neighbors:keep own normalized value
    if touching_rows.shape[0] == 0:
        
        schools_s.append(neighborhoods.loc[i, "schools_norm"])
        
        stns_s.append(neighborhoods.loc[i, "rapid_stations_norm"])
        
        bike_s.append(neighborhoods.loc[i, "bike_length_m_norm"])
        
        parks_s.append(neighborhoods.loc[i, "park_area_m2_norm"])
        
    # average of neighbors' normalized values
    else:
        
        schools_s.append(touching_rows["schools_norm"].mean())
        
        stns_s.append(touching_rows["rapid_stations_norm"].mean())
        
        bike_s.append(touching_rows["bike_length_m_norm"].mean())
        
        parks_s.append(touching_rows["park_area_m2_norm"].mean())

#save smoothed columns
neighborhoods["schools_smooth"]= schools_s

neighborhoods["rapid_stations_smooth"]= stns_s

neighborhoods["bike_length_m_smooth"]= bike_s

neighborhoods["park_area_m2_smooth"]= parks_s




#composite indexing by choosing weight
#comments: based off personal preference transit weights over other factor
w_schools = 0.25
w_transit = 0.35
w_bike    = 0.20
w_parks   = 0.20

neighborhoods["access_index"] = (
    w_schools * neighborhoods["schools_smooth"]
    
  + w_transit * neighborhoods["rapid_stations_smooth"]
  
  + w_bike    * neighborhoods["bike_length_m_smooth"]
  
  + w_parks   * neighborhoods["park_area_m2_smooth"]
)

#finalized spatial data with smoothed idnex val
finalized_column= [
    ("neigh_id" if "neigh_id" in neighborhoods.columns else "neighbor_id"),
    "access_index",
    "schools","rapid_stations","bike_length_m","park_area_m2",
    "schools_norm","rapid_stations_norm","bike_length_m_norm","park_area_m2_norm",
    "schools_smooth","rapid_stations_smooth","bike_length_m_smooth","park_area_m2_smooth"
]
finalized_column= [c for c in finalized_column if c in neighborhoods.columns]

display(neighborhoods[finalized_column].head(20))


#Mapping

In [None]:
#using WGS84 for web tiles
CRS_LATLON= "EPSG:4326"
neighborhoods_ll= neighborhoods.to_crs(CRS_LATLON).copy()


index_col = "access_index"

#build popup field list
desired_fields= [
    "neigh_id",
    index_col,
    "schools",
    "rapid_stations",
    "bike_length_m",
    "park_area_m2",
]
popup_fields = [c for c in desired_fields if c in neighborhoods_ll.columns]

print("Styling column:", index_col)
print("Popup fields:", popup_fields)

#map
m= leafmap.Map(center=[49.25, -123.12], zoom=11, draw_control=False, measure_control=False)
m.add_basemap("CartoDB.Positron")

m.add_data(
    neighborhoods_ll,
    column=index_col,
    scheme="Quantiles",
    k=5,
    cmap="GnBu",
    legend_title="Accessibility Index (Neighborhoods)",
    fields=popup_fields,
)
m


#Exporting

In [None]:
out_fp = OUT_DIR / "vancouver_neighborhood_access_index.geojson"

neighborhoods.to_crs(CRS_LATLON).to_file(out_fp, driver="GeoJSON")

print("Exported:", out_fp.resolve())