In [2]:
import os
import requests
import subprocess

#from pyrosm import OSM
import geopandas as gpd
import pandas as pd

from pathlib import Path


In [3]:
## check if osmium is installed

try:
    result = subprocess.run(['osmium', '--version'], check=True, capture_output=True, text=True)
    print(f"Osmium version: {result.stdout.strip()}")
except subprocess.CalledProcessError as e:
    print(f"Error running Osmium: {e}")

Osmium version: osmium version 1.16.0
libosmium version 2.20.0
Supported PBF compression types: none zlib lz4

Copyright (C) 2013-2023  Jochen Topf <jochen@topf.org>
License: GNU GENERAL PUBLIC LICENSE Version 3 <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.


what this does:

1. Download OSM data from geofabrik
2. Extract  highways using osmium
3. Convert pbf to geojson.gz and add tags using ogr2ogr
3. Convert geojson.gz to pmtiles using tippecanoe



In [4]:

#### Downloading OSM data from Geofabrik

set_date = "250805" 


# #https://download.geofabrik.de/europe/germany/berlin-250401.osm.pbf
# #	germany-250405.osm.pbf

# def download_geofabrik_pbf(filename,base_url = "https://download.geofabrik.de/europe/"):
#     folder_download = "osm_geofabrik_pbf"
#     os.makedirs(folder_download, exist_ok=True)
    
#     #filename = "germany-250401.osm.pbf"
#     file_path = os.path.join(folder_download, filename)
#     file_url = base_url + filename
    
#     if os.path.exists(file_path):
#         print(f"File already exists: {file_path}, skipping download.")
#     else:
#         print(f"Downloading: {file_url}")
#         response = requests.get(file_url, stream=True, timeout=60)
#         if response.status_code == 200:
#             with open(file_path, "wb") as f:
#                 for chunk in response.iter_content(chunk_size=1024):
#                     f.write(chunk)
#             print(f"Downloaded: {file_path}")
#         else:
#             print(f"Failed to download {file_url} (Status code: {response.status_code})")


# osmium needs to be installed on your system in order to run this code/filtering
# https://osmcode.org/osmium-tool/
# for my win11 machine i used https://trac.osgeo.org/osgeo4w/

def run_osmium(filename):
    try:
        folder_download = "../utils/osm_geofabrik_pbf"
        folder_processed = "processed_osm_files"
        os.makedirs(folder_processed, exist_ok=True)
        
        input_pbf = os.path.join(folder_download, filename)
        filtered_pbf = os.path.join(folder_processed, f"processed_hw5030_germany_{set_date}.pbf")

        # # Convert to Unix-style paths using forward slashes
        # input_pbf = input_pbf.replace("\\", "/")
        # filtered_pbf = filtered_pbf.replace("\\", "/")

        if os.path.exists(filtered_pbf):
            print(f"Processed file already exists: {filtered_pbf}, skipping processing.")
            return

        filter_command = [
            "osmium", "tags-filter",
            input_pbf,

            # Straßen mit potenzieller Tempo 50/30
            # "w/highway=motorway,trunk,primary,secondary,tertiary,motorway_link,trunk_link,primary_link,secondary_link,tertiary_link,unclassified,residential,service,living_street,track,road",
            "w/highway=trunk,primary,secondary,tertiary,trunk_link,primary_link,secondary_link,tertiary_link,unclassified,residential,service,road",



            "-o", filtered_pbf
        ]
        print("🔹 Running: ", " ".join(filter_command))
        subprocess.run(filter_command, check=True)

        print("✅ Osmium processing complete! Files saved in 'processed_osm_files/'")

    except subprocess.CalledProcessError as e:
        print("❌ Error running Osmium:", e)




filename = f"germany-{set_date}.osm.pbf"

#download_geofabrik_pbf(filename )

run_osmium(filename)


🔹 Running:  osmium tags-filter ../utils/osm_geofabrik_pbf/germany-250805.osm.pbf w/highway=trunk,primary,secondary,tertiary,trunk_link,primary_link,secondary_link,tertiary_link,unclassified,residential,service,road -o processed_osm_files/processed_hw5030_germany_250805.pbf
✅ Osmium processing complete! Files saved in 'processed_osm_files/'


In [5]:
### TO PArQUET (only works with special env, dont need it for tippecanoe)

In [1]:
"""
NOTE: This script depends on GDAL being compiled with Apache Arrow/Parquet support.
Not all environments support this, so please read below before running.

There are two environments commonly used with this script:

1. ✅ `gdal_parquet_env` (Conda):
   - Created using `conda` with GDAL, pyarrow, and parquet support from conda-forge
   - Supports exporting to `.parquet` using ogr2ogr
   - Required packages include: gdal, pyarrow, libgdal-arrow-parquet

2. 🚫 `venv` (venv + pip):
   - Uses system GDAL or pip-installed bindings
   - **Does NOT support Parquet**, because GDAL was not built with Arrow/Parquet support
   - Running `ogr2ogr` with Parquet output will silently fail or show "Parquet" missing from `ogr2ogr --formats`

👉 To check if your environment supports Parquet:
    `ogr2ogr --formats | grep -i parquet`

💡 Recommendation:
    Use `gdal_parquet_env` or create a new Conda environment like:
    `conda create -n parquet_env -c conda-forge gdal pyarrow`

"""


'\nNOTE: This script depends on GDAL being compiled with Apache Arrow/Parquet support.\nNot all environments support this, so please read below before running.\n\nThere are two environments commonly used with this script:\n\n1. ✅ `gdal_parquet_env` (Conda):\n   - Created using `conda` with GDAL, pyarrow, and parquet support from conda-forge\n   - Supports exporting to `.parquet` using ogr2ogr\n   - Required packages include: gdal, pyarrow, libgdal-arrow-parquet\n\n2. 🚫 `venv` (venv + pip):\n   - Uses system GDAL or pip-installed bindings\n   - **Does NOT support Parquet**, because GDAL was not built with Arrow/Parquet support\n   - Running `ogr2ogr` with Parquet output will silently fail or show "Parquet" missing from `ogr2ogr --formats`\n\n👉 To check if your environment supports Parquet:\n    `ogr2ogr --formats | grep -i parquet`\n\n💡 Recommendation:\n    Use `gdal_parquet_env` or create a new Conda environment like:\n    `conda create -n parquet_env -c conda-forge gdal pyarrow`\n\n'

In [1]:
import subprocess
from pathlib import Path
import os

def ogr2ogr_parquet(input_file, output_parquet, layer="lines", select_fields=None, osmconf_path=None):
    ogr2ogr_path = "ogr2ogr"  # Use system-installed ogr2ogr on WSL

    input_file = Path(input_file).resolve()
    output_parquet = Path(output_parquet).resolve()

    cmd = [
        ogr2ogr_path,
        "-f", "Parquet",
        str(output_parquet),
        str(input_file),
    ]

    if layer:
        cmd.append(layer)

    #if select_fields:
    #    cmd += ["-select", ",".join(select_fields)]

    # Prepare environment with optional OSM config path
    env = os.environ.copy()
    if osmconf_path:
        env["OSM_CONFIG_FILE"] = str(Path(osmconf_path).resolve())

    print("Running:", " ".join(cmd))
    try:
        result = subprocess.run(
            cmd,
            check=True,
            capture_output=True,
            text=True,
            env=env
        )
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print("❌ ogr2ogr failed:")
        print("STDERR:", e.stderr)
        print("STDOUT:", e.stdout)
        raise


In [2]:
set_date = "250805" 

folder= "processed_osm_files"
# os.makedirs(folder, exist_ok=True)

input_pbf = Path(f"{folder}/processed_hw5030_germany_{set_date}.pbf")
output_file = Path(f"{folder}/processed_hw5030_germany_{set_date}.parquet")
osmconf_file = Path(f"{folder}/osmconf_hw_ms.ini")

ogr2ogr_parquet(
    input_pbf,
    output_file,
    osmconf_path=osmconf_file
)

Running: ogr2ogr -f Parquet /home/simon/mapillary_trafficsigns/use_cases/schools_tempo30_campaign/processed_osm_files/processed_hw5030_germany_250805.parquet /home/simon/mapillary_trafficsigns/use_cases/schools_tempo30_campaign/processed_osm_files/processed_hw5030_germany_250805.pbf lines
0...10...20...30...40...50...60...70...80...90...100 - done in 00:00:54.



In [5]:
#------------------

In [4]:
def run_osmium(filename):
    try:
        folder_download = "../utils/osm_geofabrik_pbf"
        folder_processed = "processed_osm_files"
        os.makedirs(folder_processed, exist_ok=True)
        
        input_pbf = os.path.join(folder_download, filename)
        filtered_pbf = os.path.join(folder_processed, f"processed_schools_germany_{set_date}.pbf")

        # # Convert to Unix-style paths using forward slashes
        # input_pbf = input_pbf.replace("\\", "/")
        # filtered_pbf = filtered_pbf.replace("\\", "/")

        if os.path.exists(filtered_pbf):
            print(f"Processed file already exists: {filtered_pbf}, skipping processing.")
            return

        filter_command = [
            "osmium", "tags-filter",
            input_pbf,

            "nwr/amenity=school,kindergarten",

            
            "-o", filtered_pbf
        ]
        print("🔹 Running: ", " ".join(filter_command))
        subprocess.run(filter_command, check=True)

        print("✅ Osmium processing complete! Files saved in 'processed_osm_files/'")

    except subprocess.CalledProcessError as e:
        print("❌ Error running Osmium:", e)




filename = f"germany-{set_date}.osm.pbf"

run_osmium(filename)

🔹 Running:  osmium tags-filter ../utils/osm_geofabrik_pbf/germany-250805.osm.pbf nwr/amenity=school,kindergarten -o processed_osm_files/processed_schools_germany_250805.pbf
✅ Osmium processing complete! Files saved in 'processed_osm_files/'


In [1]:
import subprocess
import os

def ogr2ogr_to_geopackage(input_pbf, output_gpkg, osmconf_path=None):
    ogr2ogr_path = "ogr2ogr"

    cmd = [
        ogr2ogr_path,
        "-overwrite",                 # Overwrite the output file if it exists
        "--config",                   # Use configuration file
        "OSM_CONFIG_FILE",            # The config parameter key
        osmconf_path,                 # Path to the osmconf.ini
        "-f", "GPKG",  # Output format: GeoPackage
        #"-f", "Parquet",  # Output format: GeoPackage
        output_gpkg,   # Output GeoPackage file
        input_pbf#,      # Input .pbf file
       # "-oo OSM_CONFIG_FILE=C:/path/to/your/osmconf.ini"
    ]
    print("Running:", " ".join(cmd))  # Print the full command for debugging

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print("GeoPackage created successfully:")
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print("❌ ogr2ogr failed during conversion to GeoPackage:")
        print(e.stderr)
        raise

# Example usage
#input_pbf = "processed_osm_files/germany_osm_schools-25-05-09.pbf"
input_pbf = "processed_osm_files/processed_schools_germany_250805.pbf"

output_gpkg = "processed_osm_files/processed_schools_germany_250805.gpkg"
osmconf_path = "processed_osm_files/osmconf_schools.ini"

ogr2ogr_to_geopackage(input_pbf, output_gpkg, osmconf_path)

Running: ogr2ogr -overwrite --config OSM_CONFIG_FILE processed_osm_files/osmconf_schools.ini -f GPKG processed_osm_files/processed_schools_germany_250805.gpkg processed_osm_files/processed_schools_germany_250805.pbf
GeoPackage created successfully:
0...10...20...30...40...50...60...70...80...90...100 - done.



In [2]:
import geopandas as gpd
import fiona
import pandas as pd

def save_filtered_gpkg_layers_as_fgb(input_gpkg, output_fgb, selected_layers=None, allowed_amenities=None):
    # List all layers in the GeoPackage
    available_layers = fiona.listlayers(input_gpkg)
    print(f"📦 All available layers: {available_layers}")

    if selected_layers is None:
        selected_layers = available_layers

    # Filter to only existing layers
    layers_to_use = [layer for layer in selected_layers if layer in available_layers]
    print(f"✅ Layers to include: {layers_to_use}")

    all_gdfs = []

    for layer in layers_to_use:
        print(f"🔹 Reading layer: {layer}")
        gdf = gpd.read_file(input_gpkg, layer=layer)

        # Filter based on 'amenity'
        if 'amenity' in gdf.columns and allowed_amenities:
            before = len(gdf)
            gdf = gdf[gdf['amenity'].isin(allowed_amenities)]
            after = len(gdf)
            print(f"   🔸 Filtered {before} → {after} features by amenity")

        gdf["source_layer"] = layer  # optional
        all_gdfs.append(gdf)

    # Combine into one GeoDataFrame
    combined = pd.concat(all_gdfs, ignore_index=True)
    combined = gpd.GeoDataFrame(combined, geometry="geometry")

    # Write to FlatGeobuf
    combined.to_file(output_fgb, driver="FlatGeobuf")
    print(f"✅ FlatGeobuf file written: {output_fgb}")


In [3]:
set_date = "250805" 


save_filtered_gpkg_layers_as_fgb(
    input_gpkg=f"processed_osm_files/processed_schools_germany_{set_date}.gpkg",
    output_fgb=f"processed_osm_files/processed_schools_germany_{set_date}.fgb",
    selected_layers=["points", "multipolygons"],
    allowed_amenities=["school", "kindergarten"]
)

📦 All available layers: ['points', 'lines', 'multipolygons', 'other_relations', 'multilinestrings']
✅ Layers to include: ['points', 'multipolygons']
🔹 Reading layer: points
   🔸 Filtered 34418 → 18926 features by amenity
🔹 Reading layer: multipolygons
   🔸 Filtered 63501 → 62556 features by amenity
✅ FlatGeobuf file written: processed_osm_files/processed_schools_germany_250805.fgb
