In [8]:

import geopandas as gpd
import pandas as pd

import mercantile
from tqdm import tqdm
import json

from datetime import datetime, timezone
import requests

from vt2geojson.tools import vt_bytes_to_geojson

import os

from concurrent.futures import ThreadPoolExecutor, as_completed
#import threading


import gzip


from requests.exceptions import SSLError
import time



In [9]:
def load_tiles_from_json(bundesland_id, input_folder="prep/tile_cache"):
    path = os.path.join(input_folder, f"{bundesland_id}_tiles.json")
    with open(path, "r") as f:
        tile_list = json.load(f)
    return [mercantile.Tile(**t) for t in tile_list]

In [10]:

def export_geodata(
    gdfs,
    output_folder="output",
    base_name="mapillary_map-feature-points",
    region="ger",
    save_parquet=True,
    save_geojson_gz=True,
    metadata_path=None,
    bundesland_id=None,
):
    if gdfs is None or (isinstance(gdfs, (gpd.GeoDataFrame, pd.DataFrame)) and gdfs.empty):
        print("No data to export.")
        return

    if isinstance(gdfs, list):
        gdf_concat = pd.concat(gdfs, ignore_index=True)
        crs = None
        try:
            if len(gdfs) > 0 and getattr(gdfs[0], "crs", None) is not None:
                crs = gdfs[0].crs
        except Exception:
            crs = None
        gdf = gpd.GeoDataFrame(gdf_concat, crs=crs)
    else:
        gdf = gdfs

    os.makedirs(output_folder, exist_ok=True)
    current_timestamp = datetime.now(timezone.utc).isoformat()

    # Update metadata (ml-mf_metadata.json) similar to ml-ts_metadata.json
    if bundesland_id:
        if metadata_path is None:
            metadata_path = os.path.join(output_folder, "ml-mf_metadata.json")
        if os.path.exists(metadata_path):
            with open(metadata_path, "r", encoding="utf-8") as f:
                metadata = json.load(f)
        else:
            metadata = {
                "ml_data_from": None,
                "bundeslaender": {},
                "processed_date": None,
            }

        metadata["bundeslaender"][bundesland_id] = current_timestamp
        if metadata.get("bundeslaender"):
            metadata["ml_data_from"] = min(metadata["bundeslaender"].values())
        metadata["processed_date"] = datetime.now(timezone.utc).isoformat()

        with open(metadata_path, "w", encoding="utf-8") as f:
            json.dump(metadata, f, indent=2, ensure_ascii=False)

    if save_parquet:
        parquet_path = os.path.join(output_folder, f"{base_name}_{region}_latest.parquet")
        gdf.to_parquet(parquet_path, index=False)
        print(f"‚úî Parquet saved to: {parquet_path}")

    if save_geojson_gz:
        geojson_path = os.path.join(output_folder, f"{base_name}_{region}_latest.geojson")
        gz_path = geojson_path + ".gz"

        gdf.to_file(geojson_path, driver="GeoJSON")

        with open(geojson_path, "rb") as f_in, gzip.open(gz_path, "wb") as f_out:
            f_out.writelines(f_in)

        os.remove(geojson_path)
        print(f"‚úî Gzipped GeoJSON saved to: {gz_path}")


In [11]:
def process_bundesland(bundesland_id, region_name=None, input_folder="prep/tile_cache", output_folder="output", max_workers=3, limit_tiles=None):
    print(f"‚ñ∂Ô∏è Starte Verarbeitung f√ºr {bundesland_id}...")

    metadata_path = os.path.join(output_folder, "ml-mf_metadata.json")

    tiles = load_tiles_from_json(bundesland_id, input_folder=input_folder)
    if limit_tiles:
        tiles = tiles[:limit_tiles]

    def process_tile(tile):

        # Load your access token
        with open("config.json") as f:
            ACCESS_TOKEN = json.load(f)["ACCESS_TOKEN"]

        # Use existing variables
        tile_layer = 'point'  # #'traffic_sign'  # already defined
        tile_coverage =  'mly_map_feature_point' #"mly_map_feature_traffic_sign"
        
        url = f"https://tiles.mapillary.com/maps/vtp/{tile_coverage}/2/{tile.z}/{tile.x}/{tile.y}?access_token={ACCESS_TOKEN}"
        response = requests.get(url)
        if response.status_code != 200:
            return None
        try:
            geojson = vt_bytes_to_geojson(response.content, tile.x, tile.y, tile.z, layer=tile_layer)
            features = geojson.get("features", [])
            if not features:
                return None
            gdf_tile = gpd.GeoDataFrame.from_features(features, crs="EPSG:4326")
            gdf_tile['first_seen_at'] = gdf_tile['first_seen_at'].apply(lambda x: datetime.fromtimestamp(x / 1000, tz=timezone.utc)).dt.strftime('%Y-%m-%d')
            gdf_tile['last_seen_at'] = gdf_tile['last_seen_at'].apply(lambda x: datetime.fromtimestamp(x / 1000, tz=timezone.utc)).dt.strftime('%Y-%m-%d')
            gdf_tile['tile_x'] = tile.x
            gdf_tile['tile_y'] = tile.y
            return gdf_tile
        except Exception as e:
            print(f"‚ùå Fehler bei Tile {tile.x}/{tile.y}: {e}")
            return None

    gdf_all = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(process_tile, tile): tile for tile in tiles}
        for future in tqdm(as_completed(futures), total=len(futures), desc=f"üß© {bundesland_id}"): #, leave=False
            try:
                result = future.result()
                if result is not None:
                    gdf_all.append(result)
            except SSLError as e:
                print(f"‚ö†Ô∏è SSLError: {e}. Pausiere f√ºr 5 Minuten...")
                time.sleep(300)  # 5 Minuten Pause
            except Exception as e:
                print(f"‚ö†Ô∏è Unbekannter Fehler: {e}")

    if gdf_all:
        gdf_all = gpd.GeoDataFrame(pd.concat(gdf_all, ignore_index=True))
        export_geodata(
            gdfs=gdf_all,
            output_folder=output_folder,
            region=bundesland_id,
            save_parquet=True,
            save_geojson_gz=False,
            metadata_path=metadata_path,
            bundesland_id=bundesland_id,
        )
    else:
        print(f"‚ö†Ô∏è Keine Daten f√ºr {bundesland_id}.")


In [12]:
bland = gpd.read_file("https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/2_bundeslaender/1_sehr_hoch.geo.json")
bland#[15:]

Unnamed: 0,id,name,type,geometry
0,DE-BW,Baden-W√ºrttemberg,State,"MULTIPOLYGON (((8.70837 47.71556, 8.70918 47.7..."
1,DE-BY,Bayern,State,"POLYGON ((10.13386 50.55, 10.1398 50.54252, 10..."
2,DE-BE,Berlin,State,"POLYGON ((13.16181 52.59442, 13.174 52.59425, ..."
3,DE-BB,Brandenburg,State,"POLYGON ((13.87951 53.50107, 13.87927 53.49908..."
4,DE-HB,Bremen,State,"POLYGON ((8.98545 53.12822, 8.97316 53.12799, ..."
5,DE-HH,Hamburg,State,"POLYGON ((10.07162 53.71823, 10.0715 53.72192,..."
6,DE-HE,Hessen,State,"POLYGON ((9.49877 51.63152, 9.50474 51.62795, ..."
7,DE-MV,Mecklenburg-Vorpommern,State,"MULTIPOLYGON (((14.26472 53.71069, 14.26472 53..."
8,DE-NI,Niedersachsen,State,"MULTIPOLYGON (((6.86528 53.59597, 6.86528 53.5..."
9,DE-NW,Nordrhein-Westfalen,State,"POLYGON ((8.66628 52.52528, 8.67277 52.51795, ..."


In [13]:
bland[4:5]

Unnamed: 0,id,name,type,geometry
4,DE-HB,Bremen,State,"POLYGON ((8.98545 53.12822, 8.97316 53.12799, ..."


In [14]:


# Alle Bundesl√§nder im tile_cache verarbeiten

bland = gpd.read_file("https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/2_bundeslaender/1_sehr_hoch.geo.json")

#for _, row in bland[4:5].iterrows():
#for _, row in bland[15:].iterrows():
for _, row in bland.iterrows():

    b_id = row["id"]
    name = row["name"]

    # Nur verarbeiten, wenn eine JSON-Datei existiert
    tile_json_path = os.path.join("prep/tile_cache", f"{b_id}_tiles.json")
    if not os.path.exists(tile_json_path):
        print(f"‚è© √úberspringe {b_id}, keine Tiles gefunden.")
        continue

    process_bundesland(b_id, region_name=name)

‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-BW...


üß© DE-BW:   6%|‚ñå         | 860/14160 [02:02<22:10, 10.00it/s]  

‚ö†Ô∏è SSLError: HTTPSConnectionPool(host='tiles.mapillary.com', port=443): Max retries exceeded with url: /maps/vtp/mly_map_feature_point/2/14/8549/5669?access_token=MLY%7C9298308523597911%7Cc4503f1224f08315f3877b3fa267e186 (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_UNEXPECTED_MESSAGE] sslv3 alert unexpected message (_ssl.c:1000)'))). Pausiere f√ºr 5 Minuten...


üß© DE-BW: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14160/14160 [31:15<00:00,  7.55it/s]  


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-BW_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-BY...


üß© DE-BY: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 28376/28376 [58:05<00:00,  8.14it/s]  


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-BY_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-BE...


üß© DE-BE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 399/399 [04:49<00:00,  1.38it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-BE_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-BB...


üß© DE-BB: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13585/13585 [26:25<00:00,  8.57it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-BB_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-HB...


üß© DE-HB: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 145/145 [00:27<00:00,  5.25it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-HB_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-HH...


üß© DE-HH: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 361/361 [01:32<00:00,  3.92it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-HH_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-HE...


üß© DE-HE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8704/8704 [19:49<00:00,  7.32it/s]  


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-HE_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-MV...


üß© DE-MV: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12294/12294 [23:02<00:00,  8.89it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-MV_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-NI...


üß© DE-NI: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 22844/22844 [44:18<00:00,  8.59it/s]  


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-NI_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-NW...


üß© DE-NW: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15243/15243 [36:32<00:00,  6.95it/s] 


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-NW_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-RP...


üß© DE-RP: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8240/8240 [16:45<00:00,  8.20it/s] 


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-RP_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-SL...


üß© DE-SL: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1170/1170 [02:33<00:00,  7.61it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-SL_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-ST...


üß© DE-ST: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9077/9077 [17:40<00:00,  8.56it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-ST_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-SN...


üß© DE-SN: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8311/8311 [17:25<00:00,  7.95it/s]  


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-SN_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-SH...


üß© DE-SH: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9101/9101 [17:32<00:00,  8.64it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-SH_latest.parquet
‚ñ∂Ô∏è Starte Verarbeitung f√ºr DE-TH...


üß© DE-TH: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6819/6819 [13:46<00:00,  8.25it/s]


‚úî Parquet saved to: output/mapillary_map-feature-points_DE-TH_latest.parquet
