# 02 Prepare OSM Green Space

**Project:** NORI  
**Author:** Yuseof J  
**Date:** 10/12/25  

### **Purpose**
Load the raw OSM shapefiles, select features of interest, filter for only NYC Tracts, and output data as a GeoPackage file. 

*NOTE : I keep the OSM data relatively broad/unfilitered here, including feature classes for multipl types of green spaces. This is done simply to leave room for exploration and added functionality after the first project sprint. For now, the main feature of interest for modelling purposes will be parks (i.e. publicly accessible spaces defined specifically by fclass == 'park')*

### **Inputs**
- `data/raw/osm_ny_shp/gis_osm_landuse_a_free_1.shp`
- `data/raw/osm_ny_shp/gis_osm_natural_free_1.shp`
- `data/raw/osm_ny_shp/gis_osm_natural_a_free_1.shp`
- `data/raw/osm_ny_shp/gis_osm_pois_free_1.shp`
- `data/raw/osm_ny_shp/gis_osm_pois_a_free_1.shp`

### **Outputs**
- `data/processed/greenspaces_nyc.gpkg`
  
--------------------------------------------------------------------------

### 0. Imports and Setup

In [29]:
# package imports
import os
import pandas as pd
import geopandas as gpd
from pathlib import Path

# specify filepaths
path_osm_ny_dir = 'data/raw/osm_ny_shp/'
path_nyc_tracts = 'data/processed/nyc_tracts.gpkg'
path_landuse_polygons = path_osm_ny_dir + 'gis_osm_landuse_a_free_1.shp'
path_natural_points = path_osm_ny_dir + 'gis_osm_natural_free_1.shp'
path_natural_polygons = path_osm_ny_dir + 'gis_osm_natural_a_free_1.shp'
path_pois_points = path_osm_ny_dir + 'gis_osm_pois_free_1.shp'
path_pois_polygons = path_osm_ny_dir + 'gis_osm_pois_a_free_1.shp'
path_output_greenspaces_nyc = 'data/processed/greenspaces_nyc.gpkg'

# EPSG:2263 - this coordinate reference system is specifically used for high-accuracy mapping of nyc boroughs 
nyc_crs = 'EPSG:2263'

# ensure cwd is project root for file paths to function properly
project_root = Path(os.getcwd())            # get current directory
while not (project_root / "data").exists(): # keep moving up until in parent
    project_root = project_root.parent
os.chdir(project_root)                      # switch to parent directory

### 1. Load Data

In [2]:
# nyc tracts
gdf_tracts_nyc = gpd.read_file(path_nyc_tracts, layer="tracts")

# landuse (e.g. park, commercial, residential)
gdf_landuse_polygons = gpd.read_file(path_landuse_polygons)

# natural geographic features
gdf_natural_points = gpd.read_file(path_natural_points)
gdf_natural_polygons = gpd.read_file(path_natural_polygons)

# points of interest
gdf_pois_points = gpd.read_file(path_pois_points)
gdf_pois_polygons = gpd.read_file(path_pois_polygons)

### 2. EDA 

#### Landuse

In [3]:
gdf_landuse_polygons.columns.tolist()

['osm_id', 'code', 'fclass', 'name', 'geometry']

In [4]:
gdf_landuse_polygons.fclass.unique()

array(['forest', 'grass', 'scrub', 'nature_reserve', 'heath', 'park',
       'cemetery', 'retail', 'industrial', 'commercial', 'residential',
       'recreation_ground', 'quarry', 'farmland', 'meadow', 'military',
       'allotments', 'orchard', 'farmyard', 'vineyard'], dtype=object)

In [5]:
gdf_landuse_polygons.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,3751891,7201,forest,Newark Island,"POLYGON ((-76.96075 43.2606, -76.96058 43.2618..."
1,3751892,7201,forest,Eagle Island,"POLYGON ((-76.95436 43.25909, -76.95417 43.259..."
2,3751970,7218,grass,,"POLYGON ((-76.73312 43.00334, -76.73272 43.003..."
3,3754207,7217,scrub,Old Man Island,"POLYGON ((-75.67568 44.57677, -75.67544 44.576..."
4,3775170,7210,nature_reserve,Sturgeon Bar,"POLYGON ((-83.18954 42.06989, -83.18944 42.070..."


#### Natural

##### -- Points -- 

In [6]:
gdf_natural_points.columns.tolist()

['osm_id', 'code', 'fclass', 'name', 'geometry']

In [7]:
gdf_natural_points.fclass.unique() # which feature classes are present

array(['tree', 'spring', 'peak', 'cliff', 'beach', 'cave_entrance'],
      dtype=object)

In [8]:
gdf_natural_points.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,207694783,4121,tree,,POINT (-73.96385 40.66462)
1,213758147,4101,spring,,POINT (-74.41281 42.14883)
2,318765291,4121,tree,,POINT (-72.44996 40.91288)
3,355304959,4111,peak,Ampersand Mountain,POINT (-74.20273 44.23456)
4,356552434,4111,peak,Austin Hill,POINT (-73.40872 43.58451)


##### -- Polygons -- 

In [9]:
gdf_natural_polygons.columns.tolist()

['osm_id', 'code', 'fclass', 'name', 'geometry']

In [10]:
gdf_natural_polygons.fclass.unique() # which feature classes are present

array(['beach', 'cliff', 'spring'], dtype=object)

In [11]:
gdf_natural_polygons.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,24218028,4141,beach,Beaver Island Beach,"POLYGON ((-78.95366 42.95913, -78.95364 42.959..."
1,29698309,4141,beach,,"POLYGON ((-73.16627 40.69844, -73.16622 40.698..."
2,29698337,4141,beach,,"POLYGON ((-73.17608 40.70089, -73.176 40.70102..."
3,35798818,4141,beach,Emerald Beach,"POLYGON ((-78.89053 42.88419, -78.89045 42.884..."
4,39088568,4141,beach,Gallagher Beach,"POLYGON ((-78.86079 42.84417, -78.86054 42.844..."


#### Points Of Interest

##### -- Points -- 

In [12]:
gdf_pois_points.columns.tolist()

['osm_id', 'code', 'fclass', 'name', 'geometry']

In [13]:
gdf_pois_points.fclass.unique() # which feature classes are present

array(['camera_surveillance', 'memorial', 'bench', 'park', 'guesthouse',
       'monument', 'attraction', 'school', 'viewpoint', 'beverages',
       'beauty_shop', 'tourist_info', 'museum', 'bar', 'lighthouse',
       'dentist', 'police', 'restaurant', 'fire_station', 'tower',
       'market_place', 'ruins', 'hotel', 'toilet', 'fast_food',
       'convenience', 'pub', 'pharmacy', 'bicycle_shop', 'clothes',
       'water_tower', 'drinking_water', 'cafe', 'shelter', 'bank',
       'doityourself', 'kindergarten', 'laundry', 'furniture_shop',
       'jeweller', 'theatre', 'hairdresser', 'garden_centre',
       'sports_shop', 'doctors', 'supermarket', 'clinic', 'post_office',
       'recycling_glass', 'car_dealership', 'library', 'college',
       'community_centre', 'pitch', 'playground', 'sports_centre',
       'picnic_site', 'prison', 'university', 'hospital', 'graveyard',
       'theme_park', 'battlefield', 'comms_tower', 'observation_tower',
       'post_box', 'artwork', 'town_hall', '

In [14]:
gdf_pois_points.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,41308092,2907,camera_surveillance,,POINT (-73.90242 42.63231)
1,42105926,2907,camera_surveillance,,POINT (-73.72159 43.05355)
2,42429709,2724,memorial,,POINT (-73.91846 40.87302)
3,42442384,2724,memorial,,POINT (-73.93636 40.85382)
4,42451932,2724,memorial,,POINT (-73.92744 40.85717)


##### -- Polygons -- 

In [15]:
gdf_pois_polygons.columns.tolist()

['osm_id', 'code', 'fclass', 'name', 'geometry']

In [16]:
gdf_pois_polygons.fclass.unique() # which feature classes are present

array(['park', 'track', 'graveyard', 'playground', 'pitch', 'museum',
       'stadium', 'monument', 'attraction', 'sports_centre', 'college',
       'golf_course', 'zoo', 'theatre', 'swimming_pool', 'fountain',
       'mall', 'school', 'library', 'university', 'ruins', 'hotel',
       'wastewater_plant', 'post_office', 'camp_site', 'hospital',
       'dog_park', 'toilet', 'ice_rink', 'fort', 'prison', 'supermarket',
       'courthouse', 'battlefield', 'picnic_site', 'garden_centre',
       'cafe', 'public_building', 'doityourself', 'pharmacy',
       'department_store', 'bank', 'furniture_shop', 'doctors',
       'restaurant', 'mobile_phone_shop', 'community_centre', 'pub',
       'beverages', 'police', 'fire_station', 'memorial', 'shelter',
       'fast_food', 'market_place', 'town_hall', 'dentist', 'hairdresser',
       'cinema', 'travel_agent', 'bakery', 'convenience', 'bar',
       'arts_centre', 'sports_shop', 'jeweller', 'car_dealership',
       'motel', 'car_wash', 'gift_shop', 

In [17]:
gdf_pois_polygons.head()

Unnamed: 0,osm_id,code,fclass,name,geometry
0,5029111,2204,park,Battery Park,"POLYGON ((-74.01767 40.70371, -74.01767 40.703..."
1,8739863,2258,track,,"POLYGON ((-74.83487 44.92755, -74.83484 44.927..."
2,9591742,2204,park,College Hill Park,"POLYGON ((-73.91845 41.71138, -73.91802 41.713..."
3,11836667,2015,graveyard,Saint Michael’s Cemetery,"POLYGON ((-73.90313 40.76573, -73.90273 40.766..."
4,12737363,2205,playground,,"POLYGON ((-74.53555 42.15626, -74.53524 42.156..."


### 3. Data Filtering / Cleaning

##### Filter OSM layer's for green features

In [18]:
# filter landuse for greenspace features
landuse_features_of_interest = ['forest',
                                 'grass',
                                 'scrub',
                                 'nature_reserve',
                                 'heath',
                                 'park',
                                 'meadow',
                                 'allotments']

gdf_landuse_polygons = gdf_landuse_polygons[gdf_landuse_polygons.fclass.isin(landuse_features_of_interest)]

In [19]:
# filter natural for greenspace features (pints and polygons)
natural_features_of_interest_points = ['tree', 
                                        'spring', 
                                        'beach']

natural_features_of_interest_polygons = ['spring', 
                                        'beach']

gdf_natural_points = gdf_natural_points[gdf_natural_points.fclass.isin(natural_features_of_interest_points)]
gdf_natural_polygons = gdf_natural_polygons[gdf_natural_polygons.fclass.isin(natural_features_of_interest_polygons)]

In [20]:
# ensure no additional features exist in the polygon layer so that the list below can be used to 
# filter both the polygon and points pois gdf's
assert((set(gdf_pois_polygons.fclass.unique()) - set(gdf_pois_points.fclass.unique())) == set())

pois_features_of_interest = ['park',                                    
                             'playground',
                             'picnic_site',
                             'camp_site',
                             ]

gdf_pois_points = gdf_pois_points[gdf_pois_points.fclass.isin(pois_features_of_interest)]
gdf_pois_polygons = gdf_pois_polygons[gdf_pois_polygons.fclass.isin(pois_features_of_interest)]

In [21]:
# concatenate point features and convert to nyc crs
gdf_green_points = pd.concat([gdf_natural_points, gdf_pois_points], ignore_index=True)
gdf_green_points = gdf_green_points.to_crs(nyc_crs)

# concatenate polygon features and convert to nyc crs
gdf_green_polygons = pd.concat([gdf_landuse_polygons, gdf_natural_polygons, gdf_pois_polygons], ignore_index=True)
gdf_green_polygons = gdf_green_polygons.to_crs(nyc_crs)

In [22]:
gdf_tracts_nyc.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,GEOIDFQ,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,36,85,24402,36085024402,1400000US36085024402,244.02,Census Tract 244.02,G5020,S,1823028,2065530,40.4997874,-74.2384712,"MULTIPOLYGON (((912287.455 121583.989, 912302...."
1,36,85,27705,36085027705,1400000US36085027705,277.05,Census Tract 277.05,G5020,S,531529,0,40.5882479,-74.156982,"MULTIPOLYGON (((938978.947 152923.44, 939459.2..."
2,36,85,12806,36085012806,1400000US36085012806,128.06,Census Tract 128.06,G5020,S,1319470,580167,40.557671,-74.1076715,"MULTIPOLYGON (((950892.186 143465.985, 951155...."
3,36,47,24400,36047024400,1400000US36047024400,244.0,Census Tract 244,G5020,S,155278,0,40.6217475,-73.9862364,"MULTIPOLYGON (((987168.126 166387.055, 987335...."
4,36,47,23000,36047023000,1400000US36047023000,230.0,Census Tract 230,G5020,S,150941,0,40.637816,-73.9842809,"MULTIPOLYGON (((987443.171 173158.468, 988054...."


##### Filter only NYC features from overall NY State features

In [36]:
# select only polygons which intersect with nyc tracts
# union_all() basically creates a combined geometry of all the individual tract geometries, making calculations less complex
# only polygons that fall completely outside of nyc tracts are excluded
gdf_green_nyc_polygons = gdf_green_polygons[
    gdf_green_polygons.intersects(gdf_tracts_nyc.geometry.union_all())
].copy()

In [37]:
# select only points which fall within nyc tracts, similar to function above, but for points instead of polygons
gdf_green_nyc_points = gdf_green_points[
    gdf_green_points.intersects(gdf_tracts_nyc.geometry.union_all())
].copy()

### 4. Save Data

In [38]:
# save points and polygons as two layers in the same geopackage
gdf_green_nyc_polygons.to_file(path_output_greenspaces_nyc, layer="green_polygons")
gdf_green_nyc_points.to_file(path_output_greenspaces_nyc, layer="green_points")