In [2]:

# ------------------------------------------
# Was tut dieses Skript?
# Dieses Skript berechnet die Mapillary-Abdeckung für OSM-Straßen in Deutschland.
# Es schneidet die Straßen-Geometrien mit Mapillary-Pufferpolygone (pano und regular),
# berechnet die Länge der abgedeckten Straßenabschnitte und das Verhältnis der Abdeckung.
# Die Ergebnisse werden als GeoDataFrames gespeichert und können für weitere Analysen oder Visualisierungen genutzt werden.
# ------------------------------------------


In [8]:
import geopandas as gpd
import pandas as pd

import matplotlib.pyplot as plt

import numpy as np
from tqdm import tqdm

import json
from pathlib import Path


from shapely.geometry import box

from config import TILES_CONFIG, PROCESSING_CONFIG, MAPILLARY_CONFIG


In [16]:
### get the metadata info from the inputs

files = {
    "osm_metadata.json": "osm_data_from",
    "ml_metadata.json": "ml_data_from",
}

for fname, primary_key in files.items():
    p = Path(fname)
    if not p.exists():
        print(f"{p} not found")
        continue

    with p.open("r", encoding="utf-8") as f:
        meta = json.load(f)

    if primary_key in meta:
        print(f"{p.name} -> {primary_key}: {meta[primary_key]}")



osm_metadata.json -> osm_data_from: 2025-10-17T20:21:35Z
ml_metadata.json -> ml_data_from: 2025-10-18T00:00:00Z


### get mapillary_coverage

In [18]:
#mapillary_coverage = gpd.read_parquet("ml_output/mapillary_coverage_germany_buffered10m_dissolved_23-01-01_until_25-10-07.parquet")
mapillary_coverage = gpd.read_parquet(PROCESSING_CONFIG["ml_output_folder"]+  "/mapillary_coverage_germany_buffered_latest.parquet")

mapillary_coverage = mapillary_coverage.to_crs(25832)

mapillary_coverage_ready = mapillary_coverage.explode(index_parts=False, ignore_index=True)
mapillary_coverage_ready

Unnamed: 0,tile_x,tile_y,is_pano,geometry
0,8459,5481,False,"POLYGON ((280456.168 5660080.228, 280456.566 5..."
1,8459,5481,False,"POLYGON ((281899.417 5659048.905, 281900.331 5..."
2,8459,5481,False,"POLYGON ((281753.108 5659058.591, 281754.102 5..."
3,8459,5481,False,"POLYGON ((281675.69 5659376.219, 281676.341 56..."
4,8460,5481,False,"POLYGON ((282105.951 5659027.847, 282105.45 56..."
...,...,...,...,...
163496,8875,5465,False,"POLYGON ((920442.317 5697275.545, 920442.591 5..."
163497,8875,5465,False,"POLYGON ((920419.395 5697557.442, 920419.455 5..."
163498,8875,5465,False,"POLYGON ((920409.498 5697675.212, 920409.337 5..."
163499,8875,5466,False,"POLYGON ((919460.581 5694604.443, 919459.652 5..."


### get the highways of interest

In [20]:
# ~20s
#osm_highways = gpd.read_parquet("processed_osm_files/processed_highways_germany_251006.parquet",
osm_highways = gpd.read_parquet("processed_osm_files/processed_highways_latest.parquet",
                            columns=["osm_id","highway","geometry"])
# i would allow to easily filter for specific highway types later on, f.e. drop all the footways

# osm_highways.highway.value_counts()[:10]
# highway
# service         3829325
# track           3385832
# footway         2182596
# residential     1982153
# path            1727703
# secondary        497335
# tertiary         468466
# unclassified     413081
# steps            305526
# primary          238184
# Name: count, dtype: int64

osm_highways

Unnamed: 0,osm_id,highway,geometry
0,92,residential,"LINESTRING (13.73698 51.01663, 13.73752 51.016..."
1,93,residential,"LINESTRING (13.73939 51.01669, 13.73967 51.016..."
2,99,tertiary,"LINESTRING (11.36639 48.17411, 11.36643 48.174..."
3,100,secondary,"LINESTRING (11.33037 48.1801, 11.33032 48.1801..."
4,103,residential,"LINESTRING (13.74127 51.01615, 13.74149 51.016..."
...,...,...,...
15611991,1442900517,motorway_link,"LINESTRING (9.63244 48.6076, 9.63221 48.60774)"
15611992,1442900670,residential,"LINESTRING (6.45443 51.61332, 6.45455 51.61357)"
15611993,1442900689,construction,"LINESTRING (6.46109 51.6129, 6.461 51.61261)"
15611994,1442900690,construction,"LINESTRING (6.46002 51.61225, 6.46035 51.61302..."


In [21]:
# alternativ direkt das pbf laden -> langsam!
# set_date = "251006" 

# gdf = gpd.read_file(f"processed_osm_files/processed_highways_germany_{set_date}.pbf", layer="lines")


In [22]:
# ~ 30s

osm_highways=osm_highways.to_crs(25832)



## Falls RAM volläuft dann sowas machen:

# import numpy as np

# target = "EPSG:25832"
# src = osm_roads.crs or "EPSG:4326"  # falls oben gesetzt

# idx = np.array_split(np.arange(len(osm_roads)), 20)  # 20 Batches; passe an
# out = []

# for chunk in idx:
#     g = osm_roads.iloc[chunk]
#     out.append(g.to_crs(target))

# osm_roads_25832 = pd.concat(out, ignore_index=True)
# print(osm_roads_25832.crs)


In [23]:
## calculate length before clipping
osm_highways['length_m_before_clip'] = osm_highways.geometry.length
osm_highways

Unnamed: 0,osm_id,highway,geometry,length_m_before_clip
0,92,residential,"LINESTRING (832194.125 5662359.794, 832231.203...",386.707524
1,93,residential,"LINESTRING (832362.761 5662377.712, 832384.518...",89.421440
2,99,tertiary,"LINESTRING (675921.969 5338360.059, 675924.648...",36.439158
3,100,secondary,"LINESTRING (673224.184 5338944.301, 673220.116...",98.561214
4,103,residential,"LINESTRING (832498.092 5662325.944, 832513.585...",97.989768
...,...,...,...,...
15611991,1442900517,motorway_link,"LINESTRING (546620.593 5384029.571, 546603.371...",23.043078
15611992,1442900670,residential,"LINESTRING (323756.784 5721101.287, 323766.12 ...",29.754574
15611993,1442900689,construction,"LINESTRING (324216.189 5721039.466, 324208.664...",32.927506
15611994,1442900690,construction,"LINESTRING (324139.844 5720969.481, 324165.516...",195.172623


In [None]:
## this needs ~3h on my machine for all germany highways
## Processing roads: 100%|██████████| 15588346/15588346 [3:01:24<00:00, 1432.13it/s]  


def spatial_filter(gdf, geom):
    idx = gdf.sindex.query(geom, predicate="intersects")
    return gdf.iloc[idx]

# assign net and ml_buffers
network = osm_highways.copy()
mapillary_buffer_layer = mapillary_coverage_ready#.copy()

mapillary_buffer_layer_pano=mapillary_buffer_layer[mapillary_buffer_layer.is_pano==True].copy()
mapillary_buffer_layer_regular=mapillary_buffer_layer[mapillary_buffer_layer.is_pano==False].copy()

clipped_lines_pano = []
clipped_lines_regular = []

clipped_gdf_pano = gpd.GeoDataFrame()
clipped_gdf_regular = gpd.GeoDataFrame()

for _, row in tqdm(network.iterrows(), total=len(network), desc="Processing roads"):
    bbox = box(*row.geometry.bounds)


    # do for pano and non-pano
    filtered_polys_pano = spatial_filter(mapillary_buffer_layer_pano, bbox) 
    filtered_polys_regular = spatial_filter(mapillary_buffer_layer_regular, bbox) 

    
    if not filtered_polys_pano.empty:
        clipped_pano = gpd.clip(gpd.GeoDataFrame([row], crs=network.crs), filtered_polys_pano)
        clipped_lines_pano.append(clipped_pano)

    if not filtered_polys_regular.empty:
        clipped_regular = gpd.clip(gpd.GeoDataFrame([row], crs=network.crs), filtered_polys_regular)
        clipped_lines_regular.append(clipped_regular)


# Combine all results
clipped_gdf_pano = gpd.GeoDataFrame(pd.concat(clipped_lines_pano, ignore_index=True), crs=network.crs)
clipped_gdf_regular = gpd.GeoDataFrame(pd.concat(clipped_lines_regular, ignore_index=True), crs=network.crs)


Processing roads:   0%|          | 2369/15611996 [00:04<8:43:04, 497.37it/s]

In [12]:
clipped_gdf_pano['length_m_after_clip'] = clipped_gdf_pano.geometry.length
clipped_gdf_pano["mp_coverage_ratio"] = clipped_gdf_pano["length_m_after_clip"] / clipped_gdf_pano["length_m_before_clip"]

clipped_gdf_regular['length_m_after_clip'] = clipped_gdf_regular.geometry.length
clipped_gdf_regular["mp_coverage_ratio"] = clipped_gdf_regular["length_m_after_clip"] / clipped_gdf_regular["length_m_before_clip"]


In [None]:
# clipped_gdf_pano=clipped_gdf_pano[["osm_id", "mp_coverage_ratio", "length_m_before_clip", "length_m_after_clip", "geometry"]].copy()
# clipped_gdf_pano.to_parquet("germany_osm-highways_25-10-06_mp_pano_coverage_25-10-07_ratio.parquet")

# clipped_gdf_regular=clipped_gdf_regular[["osm_id", "mp_coverage_ratio", "length_m_before_clip", "length_m_after_clip", "geometry"]].copy()
# clipped_gdf_regular.to_parquet("germany_osm-highways_25-10-06_mp_regular_coverage_25-10-07_ratio.parquet")

In [None]:
clipped_gdf_pano=clipped_gdf_pano[["osm_id", "highway", "mp_coverage_ratio", "length_m_before_clip", "length_m_after_clip", "geometry"]].copy()
clipped_gdf_pano.to_parquet(PROCESSING_CONFIG["output_folder"] + "/germany_osm-highways_mp_pano_coverage_latest.parquet")

clipped_gdf_regular=clipped_gdf_regular[["osm_id", "highway", "mp_coverage_ratio", "length_m_before_clip", "length_m_after_clip", "geometry"]].copy()
clipped_gdf_regular.to_parquet(PROCESSING_CONFIG["output_folder"] + "/germany_osm-highways_mp_regular_coverage_latest.parquet")

In [16]:
clipped_gdf_pano

Unnamed: 0,osm_id,mp_coverage_ratio,length_m_before_clip,length_m_after_clip,geometry
0,680,1.000000,111.315259,111.315259,"LINESTRING (688736.164 5344549.747, 688846.103..."
1,1978,1.000000,66.017066,66.017066,"LINESTRING (560704.881 5924993.391, 560746.381..."
2,1880866,0.021153,498.687025,10.548804,"LINESTRING (559385.712 5925758.496, 559385.235..."
3,1880870,0.020579,435.853088,8.969463,"LINESTRING (558952.398 5925821.928, 558951.364..."
4,1881164,0.044864,222.833428,9.997157,"LINESTRING (559031.861 5925802.316, 559032.015..."
...,...,...,...,...,...
764157,1435985709,1.000000,402.610883,402.610883,"LINESTRING (453982.855 5419593.705, 453980.487..."
764158,1435989972,0.155430,6.585792,1.023633,"LINESTRING (818889.529 5820677.255, 818889.789..."
764159,1435989986,0.216426,90.160526,19.513075,"LINESTRING (818884.51 5820658.399, 818886.468 ..."
764160,1435989994,0.060086,160.849663,9.664803,"LINESTRING (818942.501 5820873.788, 818944.088..."


In [17]:
clipped_gdf_regular


Unnamed: 0,osm_id,mp_coverage_ratio,length_m_before_clip,length_m_after_clip,geometry
0,92,0.016390,386.707524,6.338077,"LINESTRING (832496.697 5662523.041, 832502.818..."
1,93,0.090189,89.421440,8.064868,"LINESTRING (832423.434 5662331.279, 832431.105..."
2,104,1.000000,501.935353,501.935353,"LINESTRING (832455.092 5663161.286, 832453.819..."
3,107,0.336806,40.495102,13.638997,"LINESTRING (832042.49 5662629.528, 832042.063 ..."
4,109,0.084293,139.461499,11.755695,"LINESTRING (832159.28 5662633.803, 832160.531 ..."
...,...,...,...,...,...
2983013,1435994704,1.000000,122.996334,122.996334,"LINESTRING (803191.898 5837764.345, 803260.773..."
2983014,1435994705,1.000000,25.014844,25.014844,"LINESTRING (803157.476 5837713.601, 803171.52 ..."
2983015,1435994706,1.000000,102.384943,102.384943,"LINESTRING (803022.363 5837542.141, 803088.908..."
2983016,1435994707,1.000000,252.525048,252.525048,"LINESTRING (802876.929 5837336.439, 802975.241..."
