# Download data from EUBUCCO database

Using the EUBUCCO database of buildings v0.1 retrieved via API. The same is available from https://eubucco.com and DOI 10.5281/zenodo.7225259.

In [1]:
import os
import pathlib
import shutil
import zipfile

import geopandas
import pooch
import requests

os.getenv("DOCKER_ENV_VERSION")


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas


'1.0'

Query API to get all available countries.

In [2]:
r = requests.get("https://api.eubucco.com/v0.1/countries")

Retrieve each file, extract, copy to a storage and save Parquets with x, y coordinates of centroids as an input for DBSCAN.

In [3]:
def parse_country(country):
    subset = [file for file in r.json() if country in file["name"]]
    for s, file in enumerate(subset):
        path = pooch.retrieve(
            file["gpkg"]["download_link"],
            known_hash=None,
            processor=pooch.Unzip(),
            progressbar=True,
        )
        for i, part in enumerate(path):
            copied = False
            if part.endswith("zip"):
                with zipfile.ZipFile(part, "r") as zip_ref:
                    zip_ref.extractall("/data/uscuni-ulce/eubucco_raw/")
                    part = "/data/uscuni-ulce/eubucco_raw/" + pathlib.Path(part).stem
                    copied = True
            df = geopandas.read_file(part, engine="pyogrio", columns=["id", "geometry"])
            centroid = df.centroid
            df["x"] = centroid.x
            df["y"] = centroid.y
            df[["id", "x", "y"]].to_parquet(
                f"/data/uscuni-ulce/centroids/{pathlib.Path(part).stem}.parquet",
            )

            if not copied:
                shutil.copy2(part, "/data/uscuni-ulce/eubucco_raw/")

Calls are split due to possible memory issues.

In [None]:
parse_country("Austria")

In [None]:
parse_country("Slovakia")

In [None]:
parse_country("Czechia")

In [None]:
parse_country("Poland")

In [None]:
parse_country("Germany")