Skip to content

Commit

Permalink
flow.GroupPlots initialization keyword argument *transform* is deprec…
Browse files Browse the repository at this point in the history
…ated (use flow.MapGroup instead). flow.GroupBy accepts formatting strings.
  • Loading branch information
ynikitenko committed Apr 25, 2022
1 parent f12468c commit de21aed
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 23 deletions.
36 changes: 25 additions & 11 deletions lena/flow/group_by.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""Group data using :class:`.GroupBy` class."""
from __future__ import print_function

import lena.core
import lena.flow

Expand All @@ -15,22 +13,29 @@ class GroupBy(object):
"""

def __init__(self, group_by):
"""*group_by* is a function, which returns
distinct hashable results for items from different groups.
It can be a dot-separated string,
which corresponds to a subcontext
(see :func:`context.get_recursively <.get_recursively>`).
"""*group_by* is a function that returns
distinct hashable results for values from different groups.
It can be also a dot-separated formatting string.
In that case only the context part of the value is used
(see :func:`context.format_context <.format_context>`).
If *group_by* is not a callable or a string,
:exc:`.LenaTypeError` is raised.
"""
self.groups = dict()
if callable(group_by):
# callable(value) is allowed for generality.
# I use group_by exclusively with context,
# and the only example I can imagine when it can probe value
# is histograms with same variables
# but with different ranges (one wouldn't be able
# to plot graphs with them without changing context though).
# This is a weak example, because this information
# could be added to context.
self._group_by = group_by
elif isinstance(group_by, str):
self._group_by = lambda val: lena.context.get_recursively(
lena.flow.get_context(val), group_by
)
fc = lena.context.format_context(group_by)
self._group_by = lambda val: fc(lena.flow.get_context(val))
else:
raise lena.core.LenaTypeError(
"group_by must be a callable or a string, "
Expand All @@ -42,8 +47,17 @@ def update(self, val):
A group key is calculated by *group_by*.
If no such key exists, a new group is created.
If a formatting key was not found for *val*,
:exc:`~LenaValueError` is raised.
"""
key = self._group_by(val)
try:
key = self._group_by(val)
except lena.core.LenaKeyError:
raise lena.core.LenaValueError(
"could not find a key for {}".format(val)
)

if key in self.groups:
self.groups[key].append(val)
else:
Expand Down
15 changes: 9 additions & 6 deletions lena/flow/group_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,17 @@ def __init__(self, group_by, select, transform=(), scale=None,
Plots are grouped by *group_by*, which returns
different keys for different groups.
If it is not an instance of :class:`.GroupBy`,
it is converted to that class.
Use :class:`.GroupBy` for more options.
It can be a function of a value or a formatting string
for its context (see :class:`.GroupBy`).
Example: *group_by="{{value.variable.name}}_{{variable.name}}"*.
*transform* is a sequence, which processes individual plots
before yielding.
For example, set ``transform=(ToCSV(), write)``.
*transform* is a sequence that processes individual plots
before yielding. Example: ``transform=(ToCSV(), write)``.
*transform* is called after *scale*.
.. deprecated:: 0.5
use :class:`MapGroup` instead of *transform*.
*scale* is a number or a string.
A number means the scale, to which plots must be normalized.
A string is a name of the plot to which other plots
Expand Down Expand Up @@ -255,6 +257,7 @@ def __init__(self, group_by, select, transform=(), scale=None,
else:
self._scale = lena.flow.group_scale.GroupScale(scale)

# deprecated. To be removed.
if isinstance(transform, lena.core.LenaSequence):
self._transform = transform
else:
Expand Down
36 changes: 30 additions & 6 deletions tests/flow/test_group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

def test_group_by():
## arbitrary callable works
data = [1, "s", [], 2]
data0 = [1, "s", [], 2]
g0 = GroupBy(type)
for val in data:
for val in data0:
g0.update(val)
r0 = g0.groups
assert r0[type(1)] == [1, 2]
Expand All @@ -20,20 +20,44 @@ def test_group_by():
g0.clear()
assert len(g0.groups) == 0

## init: wrong parameter type
## wrong initialization parameter raises
with pytest.raises(lena.core.LenaTypeError):
GroupBy(1)

## context string works
data = [(1, {"detector": "D1"}),
# simple context string
data1 = [(1, {"detector": "D1"}),
(2, {"detector": "D2"}),
(3, {"detector": "D1"})
]
g1 = GroupBy("detector")
for val in data:
g1 = GroupBy("{{detector}}")
for val in data1:
g1.update(val)
assert len(g1.groups) == 2
assert g1.groups == {
'D1': [(1, {'detector': 'D1'}), (3, {'detector': 'D1'})],
'D2': [(2, {'detector': 'D2'})]
}

data2 = (
1,
{"value":
{"variable":
{"name": "x"}},
"variable": {"name": "mean"}}
)

# missing context raises
with pytest.raises(lena.core.LenaValueError):
GroupBy("{{non_existent}}").update(data2)

# several subcontexts work
g2 = GroupBy("{{value.variable.name}}_{{variable.name}}")
g2.update(data2)
assert g2.groups == {
'x_mean': [
(1,
{'value': {'variable': {'name': 'x'}},
'variable': {'name': 'mean'}})
]
}

0 comments on commit de21aed

Please sign in to comment.