In [35]:

import pandas as pd
import geopandas as gpd
import shapely.geometry
import mercantile
from tqdm import tqdm
import os
import tempfile
import fiona

In [27]:
#change coordinate here!!!!!!
# Geometry copied from https://geojson.io
aoi_geom = {
    "coordinates": [
          [
            [
              135.11293857373914,
              34.75016415080408
            ],
            [
              135.11293857373914,
              34.60735967486704
            ],
            [
              135.3967426187233,
              34.60735967486704
            ],
            [
              135.3967426187233,
              34.75016415080408
            ],
            [
              135.11293857373914,
              34.75016415080408
            ]
          ]
        ],
        "type": "Polygon"
}
aoi_shape = shapely.geometry.shape(aoi_geom)
minx, miny, maxx, maxy = aoi_shape.bounds

output_fn = "example_building_footprints.geojson"


In [28]:
quad_keys = set()
for tile in list(mercantile.tiles(minx, miny, maxx, maxy, zooms=9)):
    quad_keys.add(int(mercantile.quadkey(tile)))
quad_keys = list(quad_keys)
print(f"The input area spans {len(quad_keys)} tiles: {quad_keys}")

The input area spans 1 tiles: [133002022]


In [29]:
df = pd.read_csv(
    "https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv"
)

idx = 0
combined_rows = []

with tempfile.TemporaryDirectory() as tmpdir:
    # Download the GeoJSON files for each tile that intersects the input geometry
    tmp_fns = []
    for quad_key in tqdm(quad_keys):
        rows = df[df["QuadKey"] == quad_key]
        if rows.shape[0] == 1:
            url = rows.iloc[0]["Url"]

            df2 = pd.read_json(url, lines=True)
            df2["geometry"] = df2["geometry"].apply(shapely.geometry.shape)

            gdf = gpd.GeoDataFrame(df2, crs=4326)
            fn = os.path.join(tmpdir, f"{quad_key}.geojson")
            tmp_fns.append(fn)
            if not os.path.exists(fn):
                gdf.to_file(fn, driver="GeoJSON")
        elif rows.shape[0] >= 1:
            row = rows.iloc[1] # Select the first row
            url = row["Url"]

            df2 = pd.read_json(url, lines=True)
            df2["geometry"] = df2["geometry"].apply(shapely.geometry.shape)

            gdf = gpd.GeoDataFrame(df2, crs=4326)
            fn = os.path.join(tmpdir, f"{quad_key}.geojson")
            tmp_fns.append(fn)
            if not os.path.exists(fn):
                gdf.to_file(fn, driver="GeoJSON")
        else:
            raise ValueError(f"QuadKey not found in dataset: {quad_key}")

    # Merge the GeoJSON files into a single file
    for fn in tmp_fns:
        with fiona.open(fn, "r") as f:
            for row in tqdm(f):
                row = dict(row)
                shape = shapely.geometry.shape(row["geometry"])

                if aoi_shape.contains(shape):
                    if "id" in row:
                        del row["id"]
                    row["properties"] = {"id": idx}
                    idx += 1
                    combined_rows.append(row)

100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [10:15<00:00, 615.55s/it]
100%|█████████████████████████████████████████████████████████████████████| 2217228/2217228 [03:26<00:00, 10746.57it/s]


In [41]:

schema = {"geometry": "Polygon", "properties": {"id": "int"}}

with fiona.open(output_fn, "w", driver="GeoJSON", schema=schema) as f:
    f.writerecords(combined_rows)
     

In [39]:
import json

# Define the output file name
output_fn = "output.geojson"

# Define the schema for the output file
schema = {"geometry": "Polygon", "properties": {"id": "int"}}

# Convert the rows to a list of GeoJSON features
features = [{"type": "Feature", "geometry": row["geometry"], "properties": {"id": row["id"]}} for row in combined_rows]

# Create a GeoJSON feature collection
feature_collection = {"type": "FeatureCollection", "features": features}

# Write the feature collection to a file
with open(output_fn, "w") as f:
    json.dump(feature_collection, f)

KeyError: 'id'