From d6a83d4e9e672d46ef64bdfdb5c58b79fd2175f3 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 27 Jun 2023 12:03:39 +0200 Subject: [PATCH] functions to decompress names (#1) * add some additional `dicttoolz` * add utils for decoding compressed strings * add `toolz` and `dateutil` to the dependencies * fix the dependency name for `dateutil` * add tests for the scene id decoder * add tests for the product id decoder * raise a more descriptive error for failed lookups * make the scan info decoder more robust and add tests * setup ci --- .github/workflows/ci.yaml | 74 ++++++++++++++++ alos2/decoders.py | 147 +++++++++++++++++++++++++++++++ alos2/dicttoolz.py | 18 ++++ alos2/tests/test_decoders.py | 128 +++++++++++++++++++++++++++ ci/requirements/environment.yaml | 4 +- pyproject.toml | 5 +- 6 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ci.yaml create mode 100644 alos2/decoders.py create mode 100644 alos2/dicttoolz.py create mode 100644 alos2/tests/test_decoders.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..1744b50 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,74 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + detect-skip-ci-trigger: + name: "Detect CI Trigger: [skip-ci]" + if: | + github.repository == 'umr-lops/xarray-ceos-alos2' + && github.event_name == 'push' + || github.event_name == 'pull_request' + runs-on: ubuntu-latest + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + - uses: xarray-contrib/ci-trigger@v1 + id: detect-trigger + with: + keyword: "[skip-ci]" + + ci: + name: ${{ matrix.os }} py${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + needs: detect-skip-ci-trigger + + if: needs.detect-skip-ci-trigger.outputs.triggered == 'false' + + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + + steps: + - name: checkout the repository + uses: actions/checkout@v3 + with: + # need to fetch all tags to get a correct version + fetch-depth: 0 # fetch all branches and tags + + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ${{ env.CONDA_ENV_FILE }} + environment-name: xarray-ceos-alos2-tests + cache-environment: true + cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + create-args: >- + python=${{matrix.python-version}} + conda + + - name: Install xarray-ceos-alos2 + run: | + python -m pip install --no-deps -e . + + - name: Import xarray-ceos-alos2 + run: | + python -c "import ceos_alos2" + + - name: Run tests + run: | + python -m pytest --cov=ceos_alos2 diff --git a/alos2/decoders.py b/alos2/decoders.py new file mode 100644 index 0000000..1075d7c --- /dev/null +++ b/alos2/decoders.py @@ -0,0 +1,147 @@ +import re + +import dateutil.parser +from tlz.dicttoolz import merge +from tlz.functoolz import curry +from tlz.itertoolz import identity as passthrough + +from .dicttoolz import valsplit + +scene_id_re = re.compile( + r"""(?x) + (?P[A-Z0-9]{5}) + (?P[0-9]{5}) + (?P[0-9]{4}) + -(?P[0-9]{6}) + """ +) +product_id_re = re.compile( + r"""(?x) + (?P[A-Z]{3}) + (?P[LR]) + (?P1\.0|1\.1|1\.5|3\.1) + (?P[GR_]) + (?P[UL_]) + (?P[AD]) + """ +) +scan_info_re = re.compile( + r"""(?x) + (?P[BF]) + (?P[0-9]) + """ +) +fname_re = re.compile( + r"""(?x) + (?P[A-Z]{3}) + (-(?P[HV]{2}))? + -(?P[A-Z0-9]{14}-[0-9]{6}) + -(?P[A-Z0-9._]{10}) + (-(?P[BF][0-9]))? + """ +) + +observation_modes = { + "SBS": "spotlight mode", + "UBS": "ultra-fine mode single polarization", + "UBD": "ultra-fine mode dual polarization", + "HBS": "high-sensitive mode single polarization", + "HBD": "high-sensitive mode dual polarization", + "HBQ": "high-sensitive mode full (quad.) polarimetry", + "FBS": "fine mode single polarization", + "FBD": "fine mode dual polarization", + "FBQ": "fine mode full (quad.) polarimetry", + "WBS": "ScanSAR nominal 14MHz mode single polarization", + "WBD": "ScanSAR nominal 14MHz mode dual polarization", + "WWS": "ScanSAR nominal 28MHz mode single polarization", + "WWD": "ScanSAR nominal 28MHz mode dual polarization", + "VBS": "ScanSAR wide mode single polarization", + "VBD": "ScanSAR wide mode dual polarization", +} +observation_directions = {"L": "left looking", "R": "right looking"} +processing_levels = { + "1.0": "level 1.0", + "1.1": "level 1.1", + "1.5": "level 1.5", + "3.1": "level 3.1", +} +processing_options = {"G": "geo-code", "R": "geo-reference", "_": "not specified"} +map_projections = {"U": "UTM", "P": "PS", "M": "MER", "L": "LCC", "_": "not specified"} +orbit_directions = {"A": "ascending", "D": "descending"} +processing_methods = {"F": "full aperture_method", "B": "SPECAN method"} + + +def lookup(mapping, code): + value = mapping.get(code) + if value is None: + raise ValueError(f"invalid code {code!r}") + + return value + + +translations = { + "observation_mode": curry(lookup, observation_modes), + "observation_direction": curry(lookup, observation_directions), + "processing_level": curry(lookup, processing_levels), + "processing_option": curry(lookup, processing_options), + "map_projection": curry(lookup, map_projections), + "orbit_direction": curry(lookup, orbit_directions), + "date": curry(dateutil.parser.parse, yearfirst=True, dayfirst=False), + "mission_name": passthrough, + "orbit_accumulation": passthrough, + "scene_frame": passthrough, + "processing_method": curry(lookup, processing_methods), + "scan_number": passthrough, +} + + +def decode_scene_id(scene_id): + match = scene_id_re.match(scene_id) + if match is None: + raise ValueError(f"invalid scene id: {scene_id}") + + groups = match.groupdict() + try: + return {name: translations[name](value) for name, value in groups.items()} + except ValueError as e: + raise ValueError(f"invalid scene id: {scene_id}") from e + + +def decode_product_id(product_id): + match = product_id_re.fullmatch(product_id) + if match is None: + raise ValueError(f"invalid product id: {product_id}") + + groups = match.groupdict() + try: + return {name: translations[name](value) for name, value in groups.items()} + except ValueError as e: + raise ValueError(f"invalid product id: {product_id}") from e + + +def decode_scan_info(scan_info): + match = scan_info_re.fullmatch(scan_info) + if match is None: + raise ValueError(f"invalid scan info: {scan_info}") + + groups = match.groupdict() + return {name: translations[name](value) for name, value in groups.items()} + + +def decode_filename(fname): + match = fname_re.fullmatch(fname) + if fname is None: + raise ValueError(f"invalid file name: {fname}") + + parts = match.groupdict() + translators = { + "filetype": passthrough, + "polarization": passthrough, + "scene_id": decode_scene_id, + "product_id": decode_product_id, + "scan_info": decode_scan_info, + } + + mapping = {name: translators[name](value) for name, value in parts.items()} + scalars, mappings = valsplit(lambda x: not isinstance(x, dict), mapping) + return scalars | merge(*mappings.values()) diff --git a/alos2/dicttoolz.py b/alos2/dicttoolz.py new file mode 100644 index 0000000..e0625e9 --- /dev/null +++ b/alos2/dicttoolz.py @@ -0,0 +1,18 @@ +from tlz.itertoolz import groupby + + +def itemsplit(predicate, d): + groups = groupby(predicate, d.items()) + first = dict(groups.get(True, ())) + second = dict(groups.get(False, ())) + return first, second + + +def valsplit(predicate, d): + wrapper = lambda item: predicate(item[1]) + return itemsplit(wrapper, d) + + +def keysplit(predicate, d): + wrapper = lambda item: predicate(item[0]) + return itemsplit(wrapper, d) diff --git a/alos2/tests/test_decoders.py b/alos2/tests/test_decoders.py new file mode 100644 index 0000000..81eaf81 --- /dev/null +++ b/alos2/tests/test_decoders.py @@ -0,0 +1,128 @@ +import datetime + +import pytest + +from alos2 import decoders + + +@pytest.mark.parametrize( + ["scene_id", "expected"], + ( + pytest.param( + "ALOS2225333200-180726", + { + "mission_name": "ALOS2", + "orbit_accumulation": "22533", + "scene_frame": "3200", + "date": datetime.datetime(2018, 7, 26), + }, + id="valid_id", + ), + pytest.param( + "ALOS2xxxxx3200-180726", + ValueError("invalid scene id:"), + id="invalid_id-invalid_orbit_accumulation", + ), + pytest.param( + "ALOS2225333200-a87433", + ValueError("invalid scene id"), + id="invalid_id-invalid_date", + ), + ), +) +def test_decode_scene_id(scene_id, expected): + if issubclass(type(expected), Exception): + with pytest.raises(type(expected), match=expected.args[0]): + decoders.decode_scene_id(scene_id) + + return + + actual = decoders.decode_scene_id(scene_id) + + assert actual == expected + + +@pytest.mark.parametrize( + ["product_id", "expected"], + ( + pytest.param( + "WWDR1.1__D", + { + "observation_mode": "ScanSAR nominal 28MHz mode dual polarization", + "observation_direction": "right looking", + "processing_level": "level 1.1", + "processing_option": "not specified", + "map_projection": "not specified", + "orbit_direction": "descending", + }, + id="valid_id-l11rd", + ), + pytest.param( + "WWDL1.1__A", + { + "observation_mode": "ScanSAR nominal 28MHz mode dual polarization", + "observation_direction": "left looking", + "processing_level": "level 1.1", + "processing_option": "not specified", + "map_projection": "not specified", + "orbit_direction": "ascending", + }, + id="valid_id-l11la", + ), + pytest.param( + "WWDR1.5RUA", + { + "observation_mode": "ScanSAR nominal 28MHz mode dual polarization", + "observation_direction": "right looking", + "processing_level": "level 1.5", + "processing_option": "geo-reference", + "map_projection": "UTM", + "orbit_direction": "ascending", + }, + id="valid_id-l15rd", + ), + pytest.param( + "WWDR1.6__A", + ValueError("invalid product id"), + id="invalid_id-invalid_level", + ), + pytest.param( + "WRDR1.1__A", + ValueError("invalid product id"), + id="invalid_id-wrong_observation_mode", + ), + ), +) +def test_decode_product_id(product_id, expected): + if issubclass(type(expected), Exception): + with pytest.raises(type(expected), match=expected.args[0]): + decoders.decode_product_id(product_id) + + return + + actual = decoders.decode_product_id(product_id) + + assert actual == expected + + +@pytest.mark.parametrize( + ["scan_info", "expected"], + ( + pytest.param( + "B4", + {"processing_method": "SPECAN method", "scan_number": "4"}, + id="valid_code", + ), + pytest.param("Ac", ValueError("invalid scan info"), id="invalid_code"), + ), +) +def test_decode_scan_info(scan_info, expected): + if issubclass(type(expected), Exception): + with pytest.raises(type(expected), match=expected.args[0]): + decoders.decode_scan_info(scan_info) + + return + + actual = decoders.decode_scan_info(scan_info) + + assert actual == expected diff --git a/ci/requirements/environment.yaml b/ci/requirements/environment.yaml index 070db44..5bf1065 100644 --- a/ci/requirements/environment.yaml +++ b/ci/requirements/environment.yaml @@ -2,8 +2,10 @@ name: xarray-alos2-tests channels: - conda-forge dependencies: - - python=3.11 - ipython - pre-commit - pytest - pytest-reportlog + - toolz + - cytoolz + - python-dateutil diff --git a/pyproject.toml b/pyproject.toml index c5ce230..8a09c50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,10 @@ name = "xarray-alos2" requires-python = ">= 3.10" license = {text = "MIT"} -dependencies = [] +dependencies = [ + "toolz", + "python-dateutil", +] dynamic = ["version"] [build-system]