In [2]:
import os
import requests
import subprocess

#from pyrosm import OSM
import geopandas as gpd
import pandas as pd

from pathlib import Path


In [3]:
## check if osmium is installed

try:
    result = subprocess.run(['osmium', '--version'], check=True, capture_output=True, text=True)
    print(f"Osmium version: {result.stdout.strip()}")
except subprocess.CalledProcessError as e:
    print(f"Error running Osmium: {e}")

Osmium version: osmium version 1.16.0
libosmium version 2.20.0
Supported PBF compression types: none zlib lz4

Copyright (C) 2013-2023  Jochen Topf <jochen@topf.org>
License: GNU GENERAL PUBLIC LICENSE Version 3 <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.


what this does:

1. Download OSM data from geofabrik
2. Extract  highways using osmium
3. Convert pbf to geojson.gz and add tags using ogr2ogr
3. Convert geojson.gz to pmtiles using tippecanoe



In [4]:

#### Downloading OSM data from Geofabrik

set_date = "250805" 


#https://download.geofabrik.de/europe/germany/berlin-250401.osm.pbf
#	germany-250405.osm.pbf

def download_geofabrik_pbf(filename,base_url = "https://download.geofabrik.de/europe/"):
    folder_download = "osm_geofabrik_pbf"
    os.makedirs(folder_download, exist_ok=True)
    
    #filename = "germany-250401.osm.pbf"
    file_path = os.path.join(folder_download, filename)
    file_url = base_url + filename
    
    if os.path.exists(file_path):
        print(f"File already exists: {file_path}, skipping download.")
    else:
        print(f"Downloading: {file_url}")
        response = requests.get(file_url, stream=True, timeout=60)
        if response.status_code == 200:
            with open(file_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=1024):
                    f.write(chunk)
            print(f"Downloaded: {file_path}")
        else:
            print(f"Failed to download {file_url} (Status code: {response.status_code})")


# osmium needs to be installed on your system in order to run this code/filtering
# https://osmcode.org/osmium-tool/
# for my win11 machine i used https://trac.osgeo.org/osgeo4w/

def run_osmium(filename):
    try:
        folder_download = "osm_geofabrik_pbf"
        folder_processed = "processed_osm_files"
        os.makedirs(folder_processed, exist_ok=True)
        
        input_pbf = os.path.join(folder_download, filename)
        filtered_pbf = os.path.join(folder_processed, f"processed_cycleways_germany_{set_date}.pbf")

        # # Convert to Unix-style paths using forward slashes
        # input_pbf = input_pbf.replace("\\", "/")
        # filtered_pbf = filtered_pbf.replace("\\", "/")

        if os.path.exists(filtered_pbf):
            print(f"Processed file already exists: {filtered_pbf}, skipping processing.")
            return

        filter_command = [
            "osmium", "tags-filter",
            input_pbf,

            # Straßen mit potenzieller Radnutzung
            "w/highway=cycleway,path,footway,residential,unclassified,living_street,road,pedestrian",
            
            # Wege, die explizit für Fahrräder ausgewiesen sind
            "w/bicycle=yes",
            "w/bicycle=designated",
            
            # Zusätzliche Radinfrastruktur-Tags
            "w/cycleway",
            "w/cycleway:left",
            "w/cycleway:right",
            "w/cycleway:both",
            "w/cycleway:lane",
            "w/cycleway:track",
            "w/cycleway:opposite",
            "w/cycleway:opposite_lane",
            "w/cycleway:opposite_track",
            "w/cycleway:shared_lane",
            # "w/cycleway:protected",

            # weiteres
            "w/sidewalk:left:bicycle",
            "w/sidewalk:right:bicycle",
            "w/sidewalk:both:bicycle",
            "w/bicycle:forward",
            "w/bicycle:backward",


            "-o", filtered_pbf
        ]
        print("🔹 Running: ", " ".join(filter_command))
        subprocess.run(filter_command, check=True)

        print("✅ Osmium processing complete! Files saved in 'processed_osm_files/'")

    except subprocess.CalledProcessError as e:
        print("❌ Error running Osmium:", e)




filename = f"germany-{set_date}.osm.pbf"

download_geofabrik_pbf(filename )

run_osmium(filename)


Downloading: https://download.geofabrik.de/europe/germany-250805.osm.pbf
Downloaded: osm_geofabrik_pbf/germany-250805.osm.pbf
🔹 Running:  osmium tags-filter osm_geofabrik_pbf/germany-250805.osm.pbf w/highway=cycleway,path,footway,residential,unclassified,living_street,road,pedestrian w/bicycle=yes w/bicycle=designated w/cycleway w/cycleway:left w/cycleway:right w/cycleway:both w/cycleway:lane w/cycleway:track w/cycleway:opposite w/cycleway:opposite_lane w/cycleway:opposite_track w/cycleway:shared_lane w/sidewalk:left:bicycle w/sidewalk:right:bicycle w/sidewalk:both:bicycle w/bicycle:forward w/bicycle:backward -o processed_osm_files/processed_cycleways_germany_250805.pbf
✅ Osmium processing complete! Files saved in 'processed_osm_files/'


In [None]:
### TO PArQUET (only works with special env, dont need it for tippecanoe)

In [None]:
"""
NOTE: This script depends on GDAL being compiled with Apache Arrow/Parquet support.
Not all environments support this, so please read below before running.

There are two environments commonly used with this script:

1. ✅ `gdal_parquet_env` (Conda):
   - Created using `conda` with GDAL, pyarrow, and parquet support from conda-forge
   - Supports exporting to `.parquet` using ogr2ogr
   - Required packages include: gdal, pyarrow, libgdal-arrow-parquet

2. 🚫 `venv` (venv + pip):
   - Uses system GDAL or pip-installed bindings
   - **Does NOT support Parquet**, because GDAL was not built with Arrow/Parquet support
   - Running `ogr2ogr` with Parquet output will silently fail or show "Parquet" missing from `ogr2ogr --formats`

👉 To check if your environment supports Parquet:
    `ogr2ogr --formats | grep -i parquet`

💡 Recommendation:
    Use `gdal_parquet_env` or create a new Conda environment like:
    `conda create -n parquet_env -c conda-forge gdal pyarrow`

"""


'\nNOTE: This script depends on GDAL being compiled with Apache Arrow/Parquet support.\nNot all environments support this, so please read below before running.\n\nThere are two environments commonly used with this script:\n\n1. ✅ `gdal_parquet_env` (Conda):\n   - Created using `conda` with GDAL, pyarrow, and parquet support from conda-forge\n   - Supports exporting to `.parquet` using ogr2ogr\n   - Required packages include: gdal, pyarrow, libgdal-arrow-parquet\n\n2. 🚫 `venv` (venv + pip):\n   - Uses system GDAL or pip-installed bindings\n   - **Does NOT support Parquet**, because GDAL was not built with Arrow/Parquet support\n   - Running `ogr2ogr` with Parquet output will silently fail or show "Parquet" missing from `ogr2ogr --formats`\n\n👉 To check if your environment supports Parquet:\n    `ogr2ogr --formats | grep -i parquet`\n\n💡 Recommendation:\n    Use `gdal_parquet_env` or create a new Conda environment like:\n    `conda create -n parquet_env -c conda-forge gdal pyarrow`\n\n'

In [1]:
import subprocess
from pathlib import Path
import os

def ogr2ogr_parquet(input_file, output_parquet, layer="lines", select_fields=None, osmconf_path=None):
    ogr2ogr_path = "ogr2ogr"  # Use system-installed ogr2ogr on WSL

    input_file = Path(input_file).resolve()
    output_parquet = Path(output_parquet).resolve()

    cmd = [
        ogr2ogr_path,
        "-f", "Parquet",
        str(output_parquet),
        str(input_file),
    ]

    if layer:
        cmd.append(layer)

    #if select_fields:
    #    cmd += ["-select", ",".join(select_fields)]

    # Prepare environment with optional OSM config path
    env = os.environ.copy()
    if osmconf_path:
        env["OSM_CONFIG_FILE"] = str(Path(osmconf_path).resolve())

    print("Running:", " ".join(cmd))
    try:
        result = subprocess.run(
            cmd,
            check=True,
            capture_output=True,
            text=True,
            env=env
        )
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print("❌ ogr2ogr failed:")
        print("STDERR:", e.stderr)
        print("STDOUT:", e.stdout)
        raise


In [4]:
set_date = "250805" 

folder= "processed_osm_files"
# os.makedirs(folder, exist_ok=True)

input_pbf = Path(f"{folder}/processed_cycleways_germany_{set_date}.pbf")
output_file = Path(f"{folder}/processed_cycleways_germany_{set_date}.parquet")
osmconf_file = Path(f"{folder}/osmconf_cycleways.ini")

ogr2ogr_parquet(
    input_pbf,
    output_file,
    osmconf_path=osmconf_file
)

Running: ogr2ogr -f Parquet /home/simon/mapillary_trafficsigns/use_cases/cycleway_complete_campaign/processed_osm_files/processed_cycleways_germany_250805.parquet /home/simon/mapillary_trafficsigns/use_cases/cycleway_complete_campaign/processed_osm_files/processed_cycleways_germany_250805.pbf lines
0...10...20...30...40...50...60...70...80...90...100 - done in 00:00:57.



In [5]:
#------------------

In [7]:
def run_osmium(filename):
    try:
        folder_download = "osm_geofabrik_pbf"
        folder_processed = "processed_osm_files"
        os.makedirs(folder_processed, exist_ok=True)
        
        input_pbf = os.path.join(folder_download, filename)
        filtered_pbf = os.path.join(folder_processed, f"processed_motorways_germany_{set_date}.pbf")

        # # Convert to Unix-style paths using forward slashes
        # input_pbf = input_pbf.replace("\\", "/")
        # filtered_pbf = filtered_pbf.replace("\\", "/")

        if os.path.exists(filtered_pbf):
            print(f"Processed file already exists: {filtered_pbf}, skipping processing.")
            return

        filter_command = [
            "osmium", "tags-filter",
            input_pbf,

            # Straßen
            "w/highway=motorway",
            
            "-o", filtered_pbf
        ]
        print("🔹 Running: ", " ".join(filter_command))
        subprocess.run(filter_command, check=True)

        print("✅ Osmium processing complete! Files saved in 'processed_osm_files/'")

    except subprocess.CalledProcessError as e:
        print("❌ Error running Osmium:", e)




filename = f"germany-{set_date}.osm.pbf"

run_osmium(filename)

🔹 Running:  osmium tags-filter osm_geofabrik_pbf/germany-250805.osm.pbf w/highway=motorway -o processed_osm_files/processed_motorways_germany_250805.pbf
✅ Osmium processing complete! Files saved in 'processed_osm_files/'


In [8]:
set_date = "250805" 

folder= "processed_osm_files"
# os.makedirs(folder, exist_ok=True)

input_pbf = Path(f"{folder}/processed_motorways_germany_{set_date}.pbf")
output_file = Path(f"{folder}/processed_motorways_germany_{set_date}.parquet")
osmconf_file = Path(f"{folder}/osmconf_motorways.ini")

ogr2ogr_parquet(
    input_pbf,
    output_file,
    osmconf_path=osmconf_file
)

Running: ogr2ogr -f Parquet /home/simon/mapillary_trafficsigns/use_cases/cycleway_complete_campaign/processed_osm_files/processed_motorways_germany_250805.parquet /home/simon/mapillary_trafficsigns/use_cases/cycleway_complete_campaign/processed_osm_files/processed_motorways_germany_250805.pbf lines
0...10...20...30...40...50...60...70...80...90...100 - done.

