Add equality testing and representation for graphs. Add field_names k…

…eyword argument to structures.HistToGraph and hist_to_graph. hist_to_graph produces graphs (not Graphs). graph's field_names must be a tuple (lists are no longer allowed). Graph is deprecated.
ynikitenko · Feb 15, 2022 · 5b91cfb · 5b91cfb
1 parent fa6b174
commit 5b91cfb
Show file tree

Hide file tree

Showing 5 changed files with 113 additions and 30 deletions.
diff --git a/lena/structures/elements.py b/lena/structures/elements.py
@@ -12,14 +12,16 @@
 class HistToGraph():
     """Transform a :class:`.histogram` to a :class:`.Graph`."""
 
-    def __init__(self, make_value, get_coordinate="left"):
+    def __init__(self, make_value, get_coordinate="left", field_names=("x", "y")):
         """*make_value* is a :class:`.Variable`
         that creates graph's value from the bin's value.
 
         *get_coordinate* defines the coordinate of the graph's point.
         By default, it is the left bin edge. Other allowed values are
         "right" and "middle".
 
+        *field_names* set field names of resulting graphs.
+
         Incorrect values for *make_value* or *get_coordinate* raise,
         respectively,
         :exc:`.LenaTypeError` or :exc:`.LenaValueError`.
@@ -31,13 +33,19 @@ def __init__(self, make_value, get_coordinate="left"):
                 "make_value must be a Variable, "
                 "{} given".format(make_value)
             )
-        # todo: functions for coordinates should be allowed
+        # todo? functions for coordinates should be allowed
+        # -- see comment in hist_to_graph
+        # todo: do we need a run method, or should it be just __call__?
+        # -- see comment in the method! It should.
+        # todo: allow passing a scale(histogram, context?) function
+        #       to create initial scales of graphs
         if get_coordinate not in ["left", "right", "middle"]:
             raise lena.core.LenaValueError(
                 'get_coordinate must be one of "left", "right" or "middle"; '
                 '"{}" provided'.format(get_coordinate)
             )
         self._get_coordinate = get_coordinate
+        self._field_names = field_names
 
     def run(self, flow):
         """Iterate the *flow* and transform histograms to graphs.
@@ -86,6 +94,7 @@ def run(self, flow):
             graph = hist_to_graph(
                 hist,
                 make_value=self._make_value.getter,
+                field_names=self._field_names,
                 get_coordinate=self._get_coordinate
             )
             yield (graph, context)
diff --git a/lena/structures/graph.py b/lena/structures/graph.py
@@ -3,6 +3,7 @@
 import functools
 import operator
 import re
+import warnings
 
 import lena.core
 import lena.context
@@ -35,7 +36,8 @@ def __init__(self, points, field_names=("x", "y"), scale=None):
         *field_names* can be a string separated by whitespace
         and/or commas
         or a sequence of strings, such as ["x", "y", "y_err"].
-        Field names must have as many elements as *points*,
+        *field_names* must be a tuple,
+        have as many elements as *points*,
         and each field name must be unique.
         Default field names are "x" and "y",
         provided for the most used 2-dimensional graphs.
@@ -87,7 +89,14 @@ def __init__(self, points, field_names=("x", "y"), scale=None):
             # split(', ') won't work.
             # From https://stackoverflow.com/a/44785447/952234:
             # \s stands for whitespace.
-            field_names = re.findall(r'[^,\s]+', field_names)
+            field_names = tuple(re.findall(r'[^,\s]+', field_names))
+        elif not isinstance(field_names, tuple):
+            # It might be non-Pythonic to require a tuple
+            # (to prohibit a list), but it's important
+            # for comparisons and uniformity
+            raise lena.core.LenaTypeError(
+                "field_names must be a string or a tuple"
+            )
 
         if len(field_names) != len(points):
             raise lena.core.LenaValueError(
@@ -114,11 +123,32 @@ def __init__(self, points, field_names=("x", "y"), scale=None):
         # Probably we won't add methods __del__(n), __add__(*coords),
         # since it might change the scale.
 
+    def __eq__(self, other):
+        """Two graphs are equal, if and only if they have
+        equal coordinates, field names and scales.
+
+        If *other* is not a :class:`.graph`, return ``False``.
+
+        Note that floating numbers should be compared
+        approximately (using :func:`math.isclose`).
+        Therefore this comparison may give false negatives.
+        """
+        if not isinstance(other, graph):
+            # in Python comparison between different types is allowed
+            return False
+        return (self._points == other._points and self._scale == other._scale
+                and self.field_names == other.field_names)
+
     def __iter__(self):
         """Iterate graph points one by one."""
         for val in zip(*self._points):
             yield val
 
+    def __repr__(self):
+        return """graph({}, field_names={}, scale={}""".format(
+            self._points, self.field_names, self._scale
+        )
+
     def scale(self, other=None):
         """Get or set the scale of the graph.
 
@@ -218,7 +248,13 @@ def _rescale_value(rescale, value):
 
 
 class Graph(object):
-    """Function at given coordinates (arbitraty dimensions).
+    """
+    .. deprecated:: 0.5
+       use :class:`graph`.
+       This class may be used in the future,
+       but with a changed interface.
+
+    Function at given coordinates (arbitraty dimensions).
 
     Graph points can be set during the initialization and
     during :meth:`fill`. It can be rescaled (producing a new :class:`Graph`).
@@ -262,6 +298,9 @@ def __init__(self, points=None, context=None, scale=None, sort=True):
         All filled values will be stored in it.
         To reduce data, use histograms.
         """
+        warnings.warn("Graph is deprecated since Lena 0.5. Use graph.",
+                      DeprecationWarning, stacklevel=2)
+
         self._points = points if points is not None else []
         # todo: add some sanity checks for points
         self._scale = scale

diff --git a/lena/structures/hist_functions.py b/lena/structures/hist_functions.py
@@ -15,7 +15,7 @@
     _reduce = reduce
 
 import lena.core
-from .graph import Graph as _Graph
+from .graph import graph as _graph, Graph as _Graph
 
 
 class HistCell(collections.namedtuple("HistCell", ("edges, bin, index"))):
@@ -295,8 +295,9 @@ def get_example_bin(struct):
         return bins
 
 
-def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
-    """Convert a :class:`.histogram` to a :class:`.Graph`.
+def hist_to_graph(hist, make_value=None, get_coordinate="left",
+                  field_names=("x", "y"), scale=None):
+    """Convert a :class:`.histogram` to a :class:`.graph`.
 
     *make_value* is a function to set the value of a graph's point.
     By default it is bin content.
@@ -310,16 +311,17 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
 
     >>> make_value = lambda bin_: (bin_.mean, bin_.mean_error)
 
-    *get_coordinate* defines what will be the coordinate
-    of a graph's point created from a histogram's bin.
+    *get_coordinate* defines what the coordinate
+    of a graph's point created from a histogram's bin will be.
     It can be "left" (default), "right" and "middle".
 
+    *field_names* set field names of the graph. Their number
+    must be the same as the dimension of the result.
+
     *scale* becomes the graph's scale (unknown by default).
 
     Return the resulting graph.
     """
-    gr = _Graph(scale=scale)
-
     ## Could have allowed get_coordinate to be callable
     # (for generality), but 1) first find a use case,
     # 2) histogram bins could be adjusted in the first place.
@@ -338,19 +340,40 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
             '"{}" provided'.format(get_coordinate)
         )
 
+    # todo: make_value may be bad design.
+    # Maybe allow to change the graph in the sequence.
+    # However, make_value allows not to recreate a graph
+    # or its coordinates (if that is not needed).
+
+    points = [[] for _ in field_names]
+
+    chain = itertools.chain
+
     for value, edges in _iter_bins_with_edges(hist.bins, hist.edges):
         coord = get_coord(edges)
-        # todo: unclear when bin_context is present.
-        bin_value = lena.flow.get_data(value)
-        # todo: maybe it should be only a tuple?
+
+        # Since we never use contexts here, it will be optimal
+        # to ignore them completely (remove them elswhere).
+        # bin_value = lena.flow.get_data(value)
+        bin_value = value
+
+        ## if we provide make_value, no need to adjust bin_value
+        # # todo: maybe it should be only a tuple?
+        # however, if there is no make_value, one-dimensional
+        # histogram might fail when chaining that
         if not hasattr(bin_value, "__iter__"):
             bin_value = (bin_value,)
+
         if make_value is None:
             graph_value = bin_value
         else:
             graph_value = make_value(bin_value)
-        gr.fill((coord, graph_value))
-    return gr
+
+        for arr, coord_ in zip(points, chain(coord, graph_value)):
+            arr.append(coord_)
+        # gr.fill((coord, graph_value))
+
+    return _graph(points, field_names, scale)
 
 
 def init_bins(edges, value=0, deepcopy=False):

diff --git a/tests/structures/test_graph.py b/tests/structures/test_graph.py
@@ -28,15 +28,22 @@ def test_graph_structure():
 
     assert gr0.scale() is None
 
+    ## points work correctly
     # empty points raise
     with pytest.raises(LenaValueError):
         graph([], "")
-    # duplicate names raise
-    with pytest.raises(LenaValueError):
-        graph([xs, ys], "x,x")
     # wrong sequence lengths raise
     with pytest.raises(LenaValueError):
         graph([[], [1]])
+
+    ## field_names work correctly
+    # duplicate names raise
+    with pytest.raises(LenaValueError):
+        graph([xs, ys], "x,x")
+    # non-tuple field names raise
+    with pytest.raises(lena.core.LenaTypeError):
+        graph([xs, ys], ["x", "y"])
+
     # field names are same as the points length
     with pytest.raises(LenaValueError):
         graph([xs, ys], "x")
@@ -62,7 +69,7 @@ def test_graph_structure():
     # x errors are unchanged, y coords change
     gr3 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x, y, x_err", scale=2)
     # spaces in field_names work
-    assert gr3.field_names == ["x", "y", "x_err"]
+    assert gr3.field_names == ("x", "y", "x_err")
     gr3.scale(1)
     assert gr3._points == [xs, [1, 1.5], [1, 2]]
 

diff --git a/tests/structures/test_hist_functions.py b/tests/structures/test_hist_functions.py
@@ -5,7 +5,7 @@
 import lena.structures
 from lena.core import LenaIndexError, LenaTypeError, LenaValueError
 from lena.math import mesh
-from lena.structures import histogram, Graph
+from lena.structures import histogram, graph, Graph
 from lena.structures import (
     check_edges_increasing,
     get_bin_edges,
@@ -113,32 +113,37 @@ def test_hist_to_graph():
     nevents = Variable("nevents", lambda nevents: nevents)
     htg = HistToGraph(nevents)
     nev_context = {'value': {'variable': {'name': 'nevents'}}}
+    gr = graph([[0], [1]])
+
     # run works correctly
     assert list(htg.run(data)) == [
         0,
-        (Graph(points=[((0,), (1,))], scale=None, sort=True),
-         nev_context),
-        (Graph(points=[((0,), (1,))], scale=None, sort=True),
-         nev_context),
+        (gr, nev_context),
+        (gr, nev_context),
         # values with the specified context are skipped
         (histogram([0, 1], bins=[1]),
          {'histogram': {'to_graph': False}}
         ),
     ]
+
     # different coordinates work
     assert list(HistToGraph(nevents, get_coordinate="right").run([hist])) == \
-        [(Graph(points=[((1,), (1,))], scale=None, sort=True), nev_context)]
+        [(graph([[1], [1]], scale=None), nev_context)]
     assert list(HistToGraph(nevents, get_coordinate="middle").run([hist])) == \
-        [(Graph(points=[((0.5,), (1,))], scale=None, sort=True), nev_context)]
+        [(graph([[0.5], [1]], scale=None), nev_context)]
 
     val_with_error = collections.namedtuple("val_with_error", ["value", "error"])
     hist1 = histogram(mesh((0, 1), 1))
     val = val_with_error(1, 2)
     hist1.bins = lena.structures.init_bins(hist1.edges, val)
     transform_value = Variable("value_error",
                                lambda val: (val.value, val.error))
-    assert list(HistToGraph(make_value=transform_value).run([hist1])) == \
-        [(Graph(points=[((0,), (1, 2))], scale=None, sort=True),
+    assert list(
+        HistToGraph(
+            make_value=transform_value, field_names=("x", "y", "z")
+        ).run([hist1])
+    ) == \
+        [(graph([[0], [1], [2]], field_names="x,y,z"),
           {'value': {'variable': {'name': 'value_error'}}})]
 
     # wrong make_value raises