From d6a83d4e9e672d46ef64bdfdb5c58b79fd2175f3 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@users.noreply.github.com>
Date: Tue, 27 Jun 2023 12:03:39 +0200
Subject: [PATCH] functions to decompress names (#1)

* add some additional `dicttoolz`

* add utils for decoding compressed strings

* add `toolz` and `dateutil` to the dependencies

* fix the dependency name for `dateutil`

* add tests for the scene id decoder

* add tests for the product id decoder

* raise a more descriptive error for failed lookups

* make the scan info decoder more robust and add tests

* setup ci
---
 .github/workflows/ci.yaml        |  74 ++++++++++++++++
 alos2/decoders.py                | 147 +++++++++++++++++++++++++++++++
 alos2/dicttoolz.py               |  18 ++++
 alos2/tests/test_decoders.py     | 128 +++++++++++++++++++++++++++
 ci/requirements/environment.yaml |   4 +-
 pyproject.toml                   |   5 +-
 6 files changed, 374 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/ci.yaml
 create mode 100644 alos2/decoders.py
 create mode 100644 alos2/dicttoolz.py
 create mode 100644 alos2/tests/test_decoders.py
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 0000000..1744b50
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,74 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  detect-skip-ci-trigger:
+    name: "Detect CI Trigger: [skip-ci]"
+    if: |
+      github.repository == 'umr-lops/xarray-ceos-alos2'
+      && github.event_name == 'push'
+      || github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    outputs:
+      triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - uses: xarray-contrib/ci-trigger@v1
+        id: detect-trigger
+        with:
+          keyword: "[skip-ci]"
+
+  ci:
+    name: ${{ matrix.os }} py${{ matrix.python-version }}
+    runs-on: ${{ matrix.os }}
+    needs: detect-skip-ci-trigger
+
+    if: needs.detect-skip-ci-trigger.outputs.triggered == 'false'
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11"]
+        os: ["ubuntu-latest", "macos-latest", "windows-latest"]
+
+    steps:
+      - name: checkout the repository
+        uses: actions/checkout@v3
+        with:
+          # need to fetch all tags to get a correct version
+          fetch-depth: 0 # fetch all branches and tags
+
+      - name: Setup micromamba
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: ${{ env.CONDA_ENV_FILE }}
+          environment-name: xarray-ceos-alos2-tests
+          cache-environment: true
+          cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
+          create-args: >-
+            python=${{matrix.python-version}}
+            conda
+
+      - name: Install xarray-ceos-alos2
+        run: |
+          python -m pip install --no-deps -e .
+
+      - name: Import xarray-ceos-alos2
+        run: |
+          python -c "import ceos_alos2"
+
+      - name: Run tests
+        run: |
+          python -m pytest --cov=ceos_alos2
diff --git a/alos2/decoders.py b/alos2/decoders.py
new file mode 100644
index 0000000..1075d7c
--- /dev/null
+++ b/alos2/decoders.py
@@ -0,0 +1,147 @@
+import re
+
+import dateutil.parser
+from tlz.dicttoolz import merge
+from tlz.functoolz import curry
+from tlz.itertoolz import identity as passthrough
+
+from .dicttoolz import valsplit
+
+scene_id_re = re.compile(
+    r"""(?x)
+    (?P<mission_name>[A-Z0-9]{5})
+    (?P<orbit_accumulation>[0-9]{5})
+    (?P<scene_frame>[0-9]{4})
+    -(?P<date>[0-9]{6})
+    """
+)
+product_id_re = re.compile(
+    r"""(?x)
+    (?P<observation_mode>[A-Z]{3})
+    (?P<observation_direction>[LR])
+    (?P<processing_level>1\.0|1\.1|1\.5|3\.1)
+    (?P<processing_option>[GR_])
+    (?P<map_projection>[UL_])
+    (?P<orbit_direction>[AD])
+    """
+)
+scan_info_re = re.compile(
+    r"""(?x)
+    (?P<processing_method>[BF])
+    (?P<scan_number>[0-9])
+    """
+)
+fname_re = re.compile(
+    r"""(?x)
+    (?P<filetype>[A-Z]{3})
+    (-(?P<polarization>[HV]{2}))?
+    -(?P<scene_id>[A-Z0-9]{14}-[0-9]{6})
+    -(?P<product_id>[A-Z0-9._]{10})
+    (-(?P<scan_info>[BF][0-9]))?
+    """
+)
+
+observation_modes = {
+    "SBS": "spotlight mode",
+    "UBS": "ultra-fine mode single polarization",
+    "UBD": "ultra-fine mode dual polarization",
+    "HBS": "high-sensitive mode single polarization",
+    "HBD": "high-sensitive mode dual polarization",
+    "HBQ": "high-sensitive mode full (quad.) polarimetry",
+    "FBS": "fine mode single polarization",
+    "FBD": "fine mode dual polarization",
+    "FBQ": "fine mode full (quad.) polarimetry",
+    "WBS": "ScanSAR nominal 14MHz mode single polarization",
+    "WBD": "ScanSAR nominal 14MHz mode dual polarization",
+    "WWS": "ScanSAR nominal 28MHz mode single polarization",
+    "WWD": "ScanSAR nominal 28MHz mode dual polarization",
+    "VBS": "ScanSAR wide mode single polarization",
+    "VBD": "ScanSAR wide mode dual polarization",
+}
+observation_directions = {"L": "left looking", "R": "right looking"}
+processing_levels = {
+    "1.0": "level 1.0",
+    "1.1": "level 1.1",
+    "1.5": "level 1.5",
+    "3.1": "level 3.1",
+}
+processing_options = {"G": "geo-code", "R": "geo-reference", "_": "not specified"}
+map_projections = {"U": "UTM", "P": "PS", "M": "MER", "L": "LCC", "_": "not specified"}
+orbit_directions = {"A": "ascending", "D": "descending"}
+processing_methods = {"F": "full aperture_method", "B": "SPECAN method"}
+
+
+def lookup(mapping, code):
+    value = mapping.get(code)
+    if value is None:
+        raise ValueError(f"invalid code {code!r}")
+
+    return value
+
+
+translations = {
+    "observation_mode": curry(lookup, observation_modes),
+    "observation_direction": curry(lookup, observation_directions),
+    "processing_level": curry(lookup, processing_levels),
+    "processing_option": curry(lookup, processing_options),
+    "map_projection": curry(lookup, map_projections),
+    "orbit_direction": curry(lookup, orbit_directions),
+    "date": curry(dateutil.parser.parse, yearfirst=True, dayfirst=False),
+    "mission_name": passthrough,
+    "orbit_accumulation": passthrough,
+    "scene_frame": passthrough,
+    "processing_method": curry(lookup, processing_methods),
+    "scan_number": passthrough,
+}
+
+
+def decode_scene_id(scene_id):
+    match = scene_id_re.match(scene_id)
+    if match is None:
+        raise ValueError(f"invalid scene id: {scene_id}")
+
+    groups = match.groupdict()
+    try:
+        return {name: translations[name](value) for name, value in groups.items()}
+    except ValueError as e:
+        raise ValueError(f"invalid scene id: {scene_id}") from e
+
+
+def decode_product_id(product_id):
+    match = product_id_re.fullmatch(product_id)
+    if match is None:
+        raise ValueError(f"invalid product id: {product_id}")
+
+    groups = match.groupdict()
+    try:
+        return {name: translations[name](value) for name, value in groups.items()}
+    except ValueError as e:
+        raise ValueError(f"invalid product id: {product_id}") from e
+
+
+def decode_scan_info(scan_info):
+    match = scan_info_re.fullmatch(scan_info)
+    if match is None:
+        raise ValueError(f"invalid scan info: {scan_info}")
+
+    groups = match.groupdict()
+    return {name: translations[name](value) for name, value in groups.items()}
+
+
+def decode_filename(fname):
+    match = fname_re.fullmatch(fname)
+    if fname is None:
+        raise ValueError(f"invalid file name: {fname}")
+
+    parts = match.groupdict()
+    translators = {
+        "filetype": passthrough,
+        "polarization": passthrough,
+        "scene_id": decode_scene_id,
+        "product_id": decode_product_id,
+        "scan_info": decode_scan_info,
+    }
+
+    mapping = {name: translators[name](value) for name, value in parts.items()}
+    scalars, mappings = valsplit(lambda x: not isinstance(x, dict), mapping)
+    return scalars | merge(*mappings.values())
diff --git a/alos2/dicttoolz.py b/alos2/dicttoolz.py
new file mode 100644
index 0000000..e0625e9
--- /dev/null
+++ b/alos2/dicttoolz.py
@@ -0,0 +1,18 @@
+from tlz.itertoolz import groupby
+
+
+def itemsplit(predicate, d):
+    groups = groupby(predicate, d.items())
+    first = dict(groups.get(True, ()))
+    second = dict(groups.get(False, ()))
+    return first, second
+
+
+def valsplit(predicate, d):
+    wrapper = lambda item: predicate(item[1])
+    return itemsplit(wrapper, d)
+
+
+def keysplit(predicate, d):
+    wrapper = lambda item: predicate(item[0])
+    return itemsplit(wrapper, d)
diff --git a/alos2/tests/test_decoders.py b/alos2/tests/test_decoders.py
new file mode 100644
index 0000000..81eaf81
--- /dev/null
+++ b/alos2/tests/test_decoders.py
@@ -0,0 +1,128 @@
+import datetime
+
+import pytest
+
+from alos2 import decoders
+
+
+@pytest.mark.parametrize(
+    ["scene_id", "expected"],
+    (
+        pytest.param(
+            "ALOS2225333200-180726",
+            {
+                "mission_name": "ALOS2",
+                "orbit_accumulation": "22533",
+                "scene_frame": "3200",
+                "date": datetime.datetime(2018, 7, 26),
+            },
+            id="valid_id",
+        ),
+        pytest.param(
+            "ALOS2xxxxx3200-180726",
+            ValueError("invalid scene id:"),
+            id="invalid_id-invalid_orbit_accumulation",
+        ),
+        pytest.param(
+            "ALOS2225333200-a87433",
+            ValueError("invalid scene id"),
+            id="invalid_id-invalid_date",
+        ),
+    ),
+)
+def test_decode_scene_id(scene_id, expected):
+    if issubclass(type(expected), Exception):
+        with pytest.raises(type(expected), match=expected.args[0]):
+            decoders.decode_scene_id(scene_id)
+
+        return
+
+    actual = decoders.decode_scene_id(scene_id)
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    ["product_id", "expected"],
+    (
+        pytest.param(
+            "WWDR1.1__D",
+            {
+                "observation_mode": "ScanSAR nominal 28MHz mode dual polarization",
+                "observation_direction": "right looking",
+                "processing_level": "level 1.1",
+                "processing_option": "not specified",
+                "map_projection": "not specified",
+                "orbit_direction": "descending",
+            },
+            id="valid_id-l11rd",
+        ),
+        pytest.param(
+            "WWDL1.1__A",
+            {
+                "observation_mode": "ScanSAR nominal 28MHz mode dual polarization",
+                "observation_direction": "left looking",
+                "processing_level": "level 1.1",
+                "processing_option": "not specified",
+                "map_projection": "not specified",
+                "orbit_direction": "ascending",
+            },
+            id="valid_id-l11la",
+        ),
+        pytest.param(
+            "WWDR1.5RUA",
+            {
+                "observation_mode": "ScanSAR nominal 28MHz mode dual polarization",
+                "observation_direction": "right looking",
+                "processing_level": "level 1.5",
+                "processing_option": "geo-reference",
+                "map_projection": "UTM",
+                "orbit_direction": "ascending",
+            },
+            id="valid_id-l15rd",
+        ),
+        pytest.param(
+            "WWDR1.6__A",
+            ValueError("invalid product id"),
+            id="invalid_id-invalid_level",
+        ),
+        pytest.param(
+            "WRDR1.1__A",
+            ValueError("invalid product id"),
+            id="invalid_id-wrong_observation_mode",
+        ),
+    ),
+)
+def test_decode_product_id(product_id, expected):
+    if issubclass(type(expected), Exception):
+        with pytest.raises(type(expected), match=expected.args[0]):
+            decoders.decode_product_id(product_id)
+
+        return
+
+    actual = decoders.decode_product_id(product_id)
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    ["scan_info", "expected"],
+    (
+        pytest.param(
+            "B4",
+            {"processing_method": "SPECAN method", "scan_number": "4"},
+            id="valid_code",
+        ),
+        pytest.param("Ac", ValueError("invalid scan info"), id="invalid_code"),
+    ),
+)
+def test_decode_scan_info(scan_info, expected):
+    if issubclass(type(expected), Exception):
+        with pytest.raises(type(expected), match=expected.args[0]):
+            decoders.decode_scan_info(scan_info)
+
+        return
+
+    actual = decoders.decode_scan_info(scan_info)
+
+    assert actual == expected
diff --git a/ci/requirements/environment.yaml b/ci/requirements/environment.yaml
index 070db44..5bf1065 100644
--- a/ci/requirements/environment.yaml
+++ b/ci/requirements/environment.yaml
@@ -2,8 +2,10 @@ name: xarray-alos2-tests
 channels:
   - conda-forge
 dependencies:
-  - python=3.11
   - ipython
   - pre-commit
   - pytest
   - pytest-reportlog
+  - toolz
+  - cytoolz
+  - python-dateutil
diff --git a/pyproject.toml b/pyproject.toml
index c5ce230..8a09c50 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,10 @@
 name = "xarray-alos2"
 requires-python = ">= 3.10"
 license = {text = "MIT"}
-dependencies = []
+dependencies = [
+  "toolz",
+  "python-dateutil",
+]
 dynamic = ["version"]
 
 [build-system]