flow.GroupPlots initialization keyword argument *transform* is deprec…

…ated (use flow.MapGroup instead). flow.GroupBy accepts formatting strings.
ynikitenko · Apr 25, 2022 · de21aed · de21aed
1 parent f12468c
commit de21aed
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 23 deletions.
diff --git a/lena/flow/group_by.py b/lena/flow/group_by.py
@@ -1,6 +1,4 @@
 """Group data using :class:`.GroupBy` class."""
-from __future__ import print_function
-
 import lena.core
 import lena.flow
 
@@ -15,22 +13,29 @@ class GroupBy(object):
     """
 
     def __init__(self, group_by):
-        """*group_by* is a function, which returns
-        distinct hashable results for items from different groups.
-        It can be a dot-separated string,
-        which corresponds to a subcontext
-        (see :func:`context.get_recursively <.get_recursively>`).
+        """*group_by* is a function that returns
+        distinct hashable results for values from different groups.
+        It can be also a dot-separated formatting string.
+        In that case only the context part of the value is used
+        (see :func:`context.format_context <.format_context>`).
 
         If *group_by* is not a callable or a string,
         :exc:`.LenaTypeError` is raised.
         """
         self.groups = dict()
         if callable(group_by):
+            # callable(value) is allowed for generality.
+            # I use group_by exclusively with context,
+            # and the only example I can imagine when it can probe value
+            # is histograms with same variables
+            # but with different ranges (one wouldn't be able
+            # to plot graphs with them without changing context though).
+            # This is a weak example, because this information
+            # could be added to context.
             self._group_by = group_by
         elif isinstance(group_by, str):
-            self._group_by = lambda val: lena.context.get_recursively(
-                lena.flow.get_context(val), group_by
-            )
+            fc = lena.context.format_context(group_by)
+            self._group_by = lambda val: fc(lena.flow.get_context(val))
         else:
             raise lena.core.LenaTypeError(
                 "group_by must be a callable or a string, "
@@ -42,8 +47,17 @@ def update(self, val):
 
         A group key is calculated by *group_by*.
         If no such key exists, a new group is created.
+
+        If a formatting key was not found for *val*,
+        :exc:`~LenaValueError` is raised.
         """
-        key = self._group_by(val)
+        try:
+            key = self._group_by(val)
+        except lena.core.LenaKeyError:
+            raise lena.core.LenaValueError(
+                "could not find a key for {}".format(val)
+            )
+
         if key in self.groups:
             self.groups[key].append(val)
         else:

diff --git a/lena/flow/group_plots.py b/lena/flow/group_plots.py
@@ -215,15 +215,17 @@ def __init__(self, group_by, select, transform=(), scale=None,
 
         Plots are grouped by *group_by*, which returns
         different keys for different groups.
-        If it is not an instance of :class:`.GroupBy`,
-        it is converted to that class.
-        Use :class:`.GroupBy` for more options.
+        It can be a function of a value or a formatting string
+        for its context (see :class:`.GroupBy`).
+        Example: *group_by="{{value.variable.name}}_{{variable.name}}"*.
 
-        *transform* is a sequence, which processes individual plots
-        before yielding.
-        For example, set ``transform=(ToCSV(), write)``.
+        *transform* is a sequence that processes individual plots
+        before yielding. Example: ``transform=(ToCSV(), write)``.
         *transform* is called after *scale*.
 
+        .. deprecated:: 0.5
+           use :class:`MapGroup` instead of *transform*.
+
         *scale* is a number or a string.
         A number means the scale, to which plots must be normalized.
         A string is a name of the plot to which other plots
@@ -255,6 +257,7 @@ def __init__(self, group_by, select, transform=(), scale=None,
         else:
             self._scale = lena.flow.group_scale.GroupScale(scale)
 
+        # deprecated. To be removed.
         if isinstance(transform, lena.core.LenaSequence):
             self._transform = transform
         else:

diff --git a/tests/flow/test_group_by.py b/tests/flow/test_group_by.py
@@ -6,9 +6,9 @@
 
 def test_group_by():
     ## arbitrary callable works
-    data = [1, "s", [], 2]
+    data0 = [1, "s", [], 2]
     g0 = GroupBy(type)
-    for val in data:
+    for val in data0:
         g0.update(val)
     r0 = g0.groups
     assert r0[type(1)] == [1, 2]
@@ -20,20 +20,44 @@ def test_group_by():
     g0.clear()
     assert len(g0.groups) == 0
 
-    ## init: wrong parameter type
+    ## wrong initialization parameter raises
     with pytest.raises(lena.core.LenaTypeError):
         GroupBy(1)
 
     ## context string works
-    data = [(1, {"detector": "D1"}),
+    # simple context string
+    data1 = [(1, {"detector": "D1"}),
             (2, {"detector": "D2"}),
             (3, {"detector": "D1"})
            ]
-    g1 = GroupBy("detector")
-    for val in data:
+    g1 = GroupBy("{{detector}}")
+    for val in data1:
         g1.update(val)
     assert len(g1.groups) == 2
     assert g1.groups == {
         'D1': [(1, {'detector': 'D1'}), (3, {'detector': 'D1'})],
         'D2': [(2, {'detector': 'D2'})]
     }
+
+    data2 = (
+        1,
+        {"value":
+             {"variable":
+                 {"name": "x"}},
+         "variable": {"name": "mean"}}
+    )
+
+    # missing context raises
+    with pytest.raises(lena.core.LenaValueError):
+        GroupBy("{{non_existent}}").update(data2)
+
+    # several subcontexts work
+    g2 = GroupBy("{{value.variable.name}}_{{variable.name}}")
+    g2.update(data2)
+    assert g2.groups == {
+        'x_mean': [
+            (1,
+             {'value': {'variable': {'name': 'x'}},
+              'variable': {'name': 'mean'}})
+        ]
+    }