In [3]:
import os
import requests
import subprocess

#from pyrosm import OSM
import geopandas as gpd
import pandas as pd

In [None]:
## check if osmium is installed

try:
    result = subprocess.run(['osmium', '--version'], check=True, capture_output=True, text=True)
    print(f"Osmium version: {result.stdout.strip()}")
except subprocess.CalledProcessError as e:
    print(f"Error running Osmium: {e}")

what this does:

1. Download OSM data from geofabrik
2. Extract all highways using osmium
3. Convert pbf tp geoparquet using ogr2ogr

OUTPUT: Filtered OSM-Netzwerk as geoparquet

In [4]:

#### Downloading OSM data from Geofabrik

set_date = "250509" # 2025-05-09


#https://download.geofabrik.de/europe/germany/berlin-250401.osm.pbf
#	germany-250405.osm.pbf

def download_geofabrik_pbf(filename,base_url = "https://download.geofabrik.de/europe/"):
    folder_download = "osm_geofabrik_pbf"
    os.makedirs(folder_download, exist_ok=True)
    
    #filename = "germany-250401.osm.pbf"
    file_path = os.path.join(folder_download, filename)
    file_url = base_url + filename
    
    if os.path.exists(file_path):
        print(f"File already exists: {file_path}, skipping download.")
    else:
        print(f"Downloading: {file_url}")
        response = requests.get(file_url, stream=True, timeout=60)
        if response.status_code == 200:
            with open(file_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=1024):
                    f.write(chunk)
            print(f"Downloaded: {file_path}")
        else:
            print(f"Failed to download {file_url} (Status code: {response.status_code})")


# osmium needs to be installed on your system in order to run this code/filtering
# https://osmcode.org/osmium-tool/
# for my win11 machine i used https://trac.osgeo.org/osgeo4w/

def run_osmium(filename):
    try:
        folder_download = "osm_geofabrik_pbf"
        folder_processed = "processed_osm_files"
        os.makedirs(folder_processed, exist_ok=True)
        
        input_pbf = os.path.join(folder_download, filename)
        filtered_pbf = os.path.join(folder_processed, f"processed_highways_germany_{set_date}.pbf")

        # # Convert to Unix-style paths using forward slashes
        # input_pbf = input_pbf.replace("\\", "/")
        # filtered_pbf = filtered_pbf.replace("\\", "/")

        if os.path.exists(filtered_pbf):
            print(f"Processed file already exists: {filtered_pbf}, skipping processing.")
            return

        filter_command = [
            "osmium", "tags-filter",
            input_pbf,
            "w/highway",
            "-o", filtered_pbf
        ]
        print("🔹 Running: ", " ".join(filter_command))
        subprocess.run(filter_command, check=True)

        print("✅ Osmium processing complete! Files saved in 'processed_osm_files/'")

    except subprocess.CalledProcessError as e:
        print("❌ Error running Osmium:", e)




filename = f"germany-{set_date}.osm.pbf"

download_geofabrik_pbf(filename )

run_osmium(filename)


File already exists: osm_geofabrik_pbf\germany-250509.osm.pbf, skipping download.
🔹 Running:  osmium tags-filter osm_geofabrik_pbf\germany-250509.osm.pbf w/highway -o processed_osm_files\processed_highways_germany_250509.pbf
✅ Osmium processing complete! Files saved in 'processed_osm_files/'


In [6]:
### takes ~3min for entire germany
# ogr2ogr needs to be installed on your system in order to run this (maybe use QGIS like here)


import subprocess
from pathlib import Path

def ogr2ogr_parquet(input_pbf, output_parquet, layer="lines"):
    ogr2ogr_path = Path(r"C:\Program Files\QGIS 3.40.6\bin\ogr2ogr.exe")

    # Convert to absolute paths
    input_pbf = str(Path(input_pbf).resolve())
    output_parquet = str(Path(output_parquet).resolve())

    cmd = [
        str(ogr2ogr_path),
        "-f", "Parquet",
        output_parquet,
        input_pbf,
        layer,
    ]

    print("Running:", " ".join(cmd))
    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)

        print(result.stdout)
    except subprocess.CalledProcessError as e:
        print("❌ ogr2ogr failed:")
        print(e.stderr)
        raise


# usage
#input_pbf = Path("processed_osm_files/processed_highways_berlin_250401.pbf")
#output_file = Path("processed_osm_files/processed_highways_berlin_250401.parquet")
input_pbf = Path(f"processed_osm_files/processed_highways_germany_{set_date}.pbf")
output_file = Path(f"processed_osm_files/processed_highways_germany_{set_date}.parquet")

ogr2ogr_parquet(
    input_pbf,
    output_file,
)

Running: C:\Program Files\QGIS 3.40.6\bin\ogr2ogr.exe -f Parquet C:\Users\simon\Nextcloud\Analysen\mapillary_coverage\processed_osm_files\processed_highways_germany_250509.parquet C:\Users\simon\Nextcloud\Analysen\mapillary_coverage\processed_osm_files\processed_highways_germany_250509.pbf lines
0...10...20...30...40...50...60...70...80...90...100 - done.

