Skip to content

Commit

Permalink
Merge pull request #475 from ungarj/metadat_parser_functions
Browse files Browse the repository at this point in the history
move format functions into subpackages
  • Loading branch information
ungarj committed Jul 11, 2022
2 parents b9ffea2 + f88595d commit 5365ebf
Show file tree
Hide file tree
Showing 9 changed files with 730 additions and 303 deletions.
2 changes: 1 addition & 1 deletion .flake8
@@ -1,6 +1,6 @@
[flake8]
max-line-length = 88
ignore = E203, E266, E501, W503, F403, F401, E741
ignore = E203, E266, E501, W503, F403, F401, E741, F821
max-complexity = 18
select = B,C,E,F,W,T4,B9
exclude = test/testdata/syntax_error.py
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Expand Up @@ -35,7 +35,7 @@ jobs:
sudo apt-get -y update
sudo apt-get install -y gdal-bin python-tk libgdal-dev libproj-dev libgeos-dev
python -m pip install --upgrade pip wheel
pip install -r test/requirements.txt
pip install -r test/requirements.txt -r requirements.txt
pip install -e .[complete]
- name: Lint with flake8
Expand Down
327 changes: 35 additions & 292 deletions mapchete/formats/__init__.py
@@ -1,292 +1,35 @@
"""
Functions handling output formats.
This module deserves a cleaner rewrite some day.
"""

import fiona
import logging
import os
from pprint import pformat
import rasterio
from rasterio.crs import CRS
import warnings

from mapchete.errors import MapcheteConfigError, MapcheteDriverError
from mapchete.io import read_json, write_json, path_exists
from mapchete._registered import drivers
from mapchete.tile import BufferedTilePyramid


logger = logging.getLogger(__name__)


def available_output_formats():
"""
Return all available output formats.
Returns
-------
formats : list
all available output formats
"""
output_formats = {}
for v in drivers:
driver_ = v.load()
if hasattr(driver_, "METADATA") and (driver_.METADATA["mode"] in ["w", "rw"]):
output_formats[driver_.METADATA["driver_name"]] = driver_.METADATA
return output_formats


def available_input_formats():
"""
Return all available input formats.
Returns
-------
formats : list
all available input formats
"""
input_formats = {}
for v in drivers:
logger.debug("driver found: %s", v)
driver_ = v.load()
if hasattr(driver_, "METADATA") and (driver_.METADATA["mode"] in ["r", "rw"]):
input_formats[driver_.METADATA["driver_name"]] = driver_.METADATA
return input_formats


def load_output_reader(output_params):
"""
Return OutputReader class of driver.
Returns
-------
output : ``OutputDataReader``
output reader object
"""
if not isinstance(output_params, dict):
raise TypeError("output_params must be a dictionary")
driver_name = output_params["format"]
for v in drivers:
_driver = v.load()
if all(
[hasattr(_driver, attr) for attr in ["OutputDataReader", "METADATA"]]
) and (_driver.METADATA["driver_name"] == driver_name):
return _driver.OutputDataReader(output_params, readonly=True)
raise MapcheteDriverError("no loader for driver '%s' could be found." % driver_name)


def load_output_writer(output_params, readonly=False):
"""
Return output class of driver.
Returns
-------
output : ``OutputDataWriter``
output writer object
"""
if not isinstance(output_params, dict):
raise TypeError("output_params must be a dictionary")
driver_name = output_params["format"]
for v in drivers:
_driver = v.load()
if all(
[hasattr(_driver, attr) for attr in ["OutputDataWriter", "METADATA"]]
) and (_driver.METADATA["driver_name"] == driver_name):
return _driver.OutputDataWriter(output_params, readonly=readonly)
raise MapcheteDriverError("no loader for driver '%s' could be found." % driver_name)


def load_input_reader(input_params, readonly=False, input_key=None):
"""
Return input class of driver.
Returns
-------
input_params : ``InputData``
input parameters
"""
logger.debug("find input reader with params %s", input_params)
if not isinstance(input_params, dict):
raise TypeError("input_params must be a dictionary")
if "abstract" in input_params:
driver_name = input_params["abstract"]["format"]
elif "path" in input_params:
if os.path.splitext(input_params["path"])[1]:
input_file = input_params["path"]
driver_name = driver_from_file(input_file)
else:
logger.debug("%s is a directory", input_params["path"])
driver_name = "TileDirectory"
else:
raise MapcheteDriverError("invalid input parameters %s" % input_params)
for v in drivers:
driver_ = v.load()
if hasattr(driver_, "METADATA") and (
driver_.METADATA["driver_name"] == driver_name
):
return driver_.InputData(
input_params, readonly=readonly, input_key=input_key
)
raise MapcheteDriverError("no loader for driver '%s' could be found." % driver_name)


def driver_metadata(driver_name):
"""Return driver metadata."""
for v in drivers:
driver_ = v.load()
if hasattr(driver_, "METADATA") and (
driver_.METADATA["driver_name"] == driver_name
):
return dict(driver_.METADATA)
else: # pragma: no cover
raise ValueError(f"driver '{driver_name}' not found")


def driver_from_file(input_file, quick=True):
"""
Guess driver from file by opening it.
Returns
-------
driver : string
driver name
"""
file_ext = os.path.splitext(input_file)[1].split(".")[1]

# mapchete files can immediately be returned:
if file_ext == "mapchete":
return "Mapchete"

# use the most common file extensions to quickly determine input driver for file:
if quick:
if file_ext in ["tif", "jp2"]:
return "raster_file"
if file_ext in ["shp", "geojson", "gpkg"]:
return "vector_file"

# brute force by trying to open file with rasterio and fiona:
try:
logger.debug("try to open %s with rasterio...", input_file)
with rasterio.open(input_file): # pragma: no cover
return "raster_file"
except Exception as rio_exception:
try:
logger.debug("try to open %s with fiona...", input_file)
with fiona.open(input_file): # pragma: no cover
return "vector_file"
except Exception as fio_exception:
if path_exists(input_file):
logger.exception(f"fiona error: {fio_exception}")
logger.exception(f"rasterio error: {rio_exception}")
raise MapcheteDriverError(
"%s has an unknown file extension or could not be opened by neither "
"rasterio nor fiona." % input_file
)
else:
raise FileNotFoundError("%s does not exist" % input_file)


def data_type_from_extension(file_extension):
"""
Guess data_type (raster or vector) from file extension.
Returns
-------
driver : string
driver name
"""
for v in drivers:
driver = v.load()
try:
driver_extensions = driver.METADATA.get("file_extensions")
if driver_extensions and file_extension in driver_extensions:
return driver.METADATA["data_type"]
except AttributeError: # pragma: no cover
pass
else:
raise ValueError(
f"data type for file extension {file_extension} could not be found"
)


def params_to_dump(params):
# in case GridDefinition was not yet initialized
return dict(
pyramid=BufferedTilePyramid(
grid=params["grid"],
tile_size=params.get("tile_size", 256),
metatiling=params.get("metatiling", 1),
pixelbuffer=params.get("pixelbuffer", 0),
).to_dict(),
driver={
k: v
for k, v in params.items()
if k not in ["path", "grid", "pixelbuffer", "metatiling"]
},
)


def read_output_metadata(metadata_json, **kwargs):
params = read_json(metadata_json, **kwargs)
grid = params["pyramid"]["grid"]
if grid["type"] == "geodetic" and grid["shape"] == [2, 1]: # pragma: no cover
warnings.warn(
DeprecationWarning(
"Deprecated grid shape ordering found. "
"Please change grid shape from [2, 1] to [1, 2] in %s." % metadata_json
)
)
params["pyramid"]["grid"]["shape"] = [1, 2]
if "crs" in grid and isinstance(grid["crs"], str):
crs = CRS.from_string(grid["crs"])
warnings.warn(
DeprecationWarning(
"Deprecated 'srs' found in %s: '%s'. "
"Use WKT representation instead: %s"
% (metadata_json, grid["crs"], pformat(dict(wkt=crs.to_wkt())))
)
)
params["pyramid"]["grid"].update(srs=dict(wkt=crs.to_wkt()))
params.update(
pyramid=BufferedTilePyramid(
params["pyramid"]["grid"],
metatiling=params["pyramid"].get("metatiling", 1),
tile_size=params["pyramid"].get("tile_size", 256),
pixelbuffer=params["pyramid"].get("pixelbuffer", 0),
)
)
return params


def write_output_metadata(output_params):
"""Dump output JSON and verify parameters if output metadata exist."""
if "path" in output_params:
metadata_path = os.path.join(output_params["path"], "metadata.json")
logger.debug("check for output %s", metadata_path)
try:
existing_params = read_output_metadata(metadata_path)
logger.debug("%s exists", metadata_path)
logger.debug("existing output parameters: %s", pformat(existing_params))
existing_tp = existing_params["pyramid"]
current_params = params_to_dump(output_params)
logger.debug("current output parameters: %s", pformat(current_params))
current_tp = BufferedTilePyramid(**current_params["pyramid"])
if existing_tp != current_tp: # pragma: no cover
raise MapcheteConfigError(
"pyramid definitions between existing and new output do not match: "
"%s != %s" % (existing_tp, current_tp)
)
existing_format = existing_params["driver"]["format"]
current_format = current_params["driver"]["format"]
if existing_format != current_format: # pragma: no cover
raise MapcheteConfigError(
"existing output format does not match new output format: "
"%s != %s" % ((existing_format, current_format))
)
except FileNotFoundError:
logger.debug("%s does not exist", metadata_path)
dump_params = params_to_dump(output_params)
# dump output metadata
write_json(metadata_path, dump_params)
from mapchete.formats.loaders import (
load_input_reader,
load_output_reader,
load_output_writer,
)
from mapchete.formats.tools import (
available_input_formats,
available_output_formats,
driver_metadata,
driver_from_extension,
driver_from_file,
data_type_from_extension,
dump_metadata,
load_metadata,
read_output_metadata,
write_output_metadata,
compare_metadata_params,
)

__all__ = [
"available_input_formats",
"available_output_formats",
"load_input_reader",
"load_output_reader",
"load_output_writer",
"driver_metadata",
"driver_from_extension",
"driver_from_file",
"data_type_from_extension",
"dump_metadata",
"load_metadata",
"read_output_metadata",
"write_output_metadata",
"compare_metadata_params",
]
12 changes: 7 additions & 5 deletions mapchete/formats/default/tile_directory.py
Expand Up @@ -14,7 +14,7 @@
read_output_metadata,
)
from mapchete.io import path_exists, absolute_path, tile_to_zoom_level
from mapchete.io.vector import reproject_geometry, segmentize_geometry
from mapchete.io.vector import reproject_geometry
from mapchete.tile import BufferedTilePyramid


Expand Down Expand Up @@ -129,10 +129,12 @@ def __init__(self, input_params, **kwargs):

# additional params
self._bounds = self._params.get("bounds", self.td_pyramid.bounds)
self.METADATA.update(
data_type=self._data_type, file_extensions=[self._params["extension"]]
self._metadata = dict(
self.METADATA,
data_type=self._data_type,
file_extensions=[self._params["extension"]],
)
if self.METADATA.get("data_type") == "raster":
if self._metadata.get("data_type") == "raster":
self._params["count"] = self._params.get(
"count", self._params.get("bands", None)
)
Expand Down Expand Up @@ -160,7 +162,7 @@ def open(self, tile, **kwargs):
"""
return InputTile(
tile,
data_type=self.METADATA.get("data_type"),
data_type=self._metadata.get("data_type"),
basepath=self.path,
file_extension=self._ext,
profile=self._profile,
Expand Down

0 comments on commit 5365ebf

Please sign in to comment.