Add a graph structure (documented and tested).

ynikitenko · Feb 14, 2022 · fa6b174 · fa6b174
1 parent 095c4f6
commit fa6b174
Show file tree

Hide file tree

Showing 5 changed files with 274 additions and 9 deletions.
diff --git a/docs/source/structures.rst b/docs/source/structures.rst
@@ -14,6 +14,7 @@ Structures
 .. currentmodule:: lena.structures.graph
 .. autosummary::
 
+    graph
     Graph
 
 .. currentmodule:: lena.structures.elements
@@ -84,6 +85,9 @@ Graph
 -----
 
 .. module:: lena.structures.graph
+.. autoclass:: graph
+    :members:
+
 .. autoclass:: Graph
     :members:
 

diff --git a/lena/structures/__init__.py b/lena/structures/__init__.py
@@ -1,5 +1,5 @@
 from .elements import HistToGraph
-from .graph import Graph
+from .graph import graph, Graph
 from .histogram import histogram, Histogram
 from .hist_functions import (
     cell_to_string,
@@ -26,7 +26,7 @@
 
 __all__ = [
     # structures
-    'Graph',
+    'graph', 'Graph',
     'histogram',
     'HistCell',
     'Histogram',

diff --git a/lena/structures/graph.py b/lena/structures/graph.py
@@ -1,11 +1,218 @@
-"""Graph is a function at given points."""
+"""A graph is a function at given points."""
 import copy
+import functools
+import operator
+import re
 
 import lena.core
 import lena.context
 import lena.flow
 
 
+class graph():
+    """Numeric arrays of equal size."""
+
+    def __init__(self, points, field_names=("x", "y"), scale=None):
+        """This structure generally corresponds
+        to the graph of a function
+        and represents arrays of coordinates and the function values
+        of arbitrary dimensions.
+
+        *points* is a list of one-dimensional
+        coordinate and value sequences (usually lists).
+        There is little to no distinction between them,
+        and "values" can also be called "coordinates".
+
+        *field_names* provide the meaning of these arrays.
+        For example, a 3-dimensional graph could be distinguished
+        from a 2-dimensional graph with errors by its fields
+        ("x", "y", "z") instead of ("x", "y", "y_err"),
+        or ("x", "y", "y_err_low", "y_err_high").
+        Field names are used to transform Lena graphs to graphs
+        from other libraries.
+        Field names don't affect drawing graphs:
+        for that :class:`~Variable`-s should be used.
+        *field_names* can be a string separated by whitespace
+        and/or commas
+        or a sequence of strings, such as ["x", "y", "y_err"].
+        Field names must have as many elements as *points*,
+        and each field name must be unique.
+        Default field names are "x" and "y",
+        provided for the most used 2-dimensional graphs.
+
+        Error fields must go after all other coordinates.
+        Names of coordinate errors are those of coordinates plus "_err",
+        further error details are appended after '_'
+        (see the examples above).
+        Otherwise field names are arbitrary.
+
+        *scale* of the graph is a kind of its norm. It could be
+        the integral of the function or its other property.
+        A scale of a normalised probability density
+        function would be one.
+        An initialized *scale* is required if one needs
+        to renormalise the graph in :meth:`scale`
+        (for example, to plot it with other graphs).
+
+        Coordinates of a function graph would usually be arrays
+        of increasing values, which is not required here.
+        Neither is it checked that coordinates indeed
+        contain one-dimensional numeric values.
+        However, non-standard graphs
+        will likely lead to errors during plotting
+        and will require more programmer's work and caution,
+        so use them only if you understand what you are doing.
+
+        A graph can be iterated yielding tuples of numbers
+        for each point. Graph field names can be accessed
+        as its *field_names* attribute.
+        """
+        if not points:
+            raise lena.core.LenaValueError(
+                "points must be a non-empty sequence "
+                "of coordinate sequences"
+            )
+
+        # require points to be of the same size
+        pt_len = len(points[0])
+        for arr in points[1:]:
+            if len(arr) != pt_len:
+                raise lena.core.LenaValueError(
+                    "points must have subsequences of equal lengths"
+                )
+
+        # Unicode (Python 2) field names would be just bad,
+        # so we don't check for it here.
+        if isinstance(field_names, str):
+            # split(', ') won't work.
+            # From https://stackoverflow.com/a/44785447/952234:
+            # \s stands for whitespace.
+            field_names = re.findall(r'[^,\s]+', field_names)
+
+        if len(field_names) != len(points):
+            raise lena.core.LenaValueError(
+                "field_names must have must have the same size as points"
+            )
+
+        if len(set(field_names)) != len(field_names):
+            raise lena.core.LenaValueError(
+                "field_names contain duplicate names"
+            )
+
+        # todo: or just fields?..
+        self.field_names = field_names
+        self._points = points
+        self._scale = scale
+
+        # todo: add subsequences of points as attributes
+        # with field names.
+        # In case if someone wants to create a graph of another function
+        # at the same coordinates.
+        # Should a) work when we rescale the graph
+        #        b) not interfere with other fields and methods
+
+        # Probably we won't add methods __del__(n), __add__(*coords),
+        # since it might change the scale.
+
+    def __iter__(self):
+        """Iterate graph points one by one."""
+        for val in zip(*self._points):
+            yield val
+
+    def scale(self, other=None):
+        """Get or set the scale of the graph.
+
+        If *other* is ``None``, return the scale of this graph.
+
+        If a numeric *other* is provided, rescale to that value.
+        If the graph has unknown or zero scale,
+        rescaling that will raise :exc:`~.LenaValueError`.
+
+        To get meaningful results, graph's fields are used.
+        Only the last coordinate is rescaled.
+        For example, if the graph has *x* and *y* coordinates,
+        then *y* will be rescaled, and for a 3-dimensional graph
+        *z* will be rescaled.
+        All errors are also rescaled together with their coordinate.
+        """
+        # this method is called scale() for uniformity with histograms
+        # And this looks really good: explicit for computations
+        # (not a subtle graph.scale, like a constant field (which is,
+        #  however, the case in graph - but not in other structures))
+        # and easy to remember (set_scale? rescale? change_scale_to?..)
+
+        # Abandoned: that would be redundant (not optimal)
+        # to create a new graph
+        # if we only want to change the scale of the existing one.
+        ## A new :class:`graph` is returned, the original is unchanged.
+
+        if other is None:
+            return self._scale
+
+        if not self._scale:
+            raise lena.core.LenaValueError(
+                "can't rescale a graph with zero or unknown scale"
+            )
+
+        def get_last_coord_ind_name(field_names):
+            for ind, fn in enumerate(field_names):
+                if fn.endswith("_err") or "_err_" in fn:
+                    ind -= 1
+                    break
+            return (ind, field_names[ind])
+
+        last_coord_ind, last_coord_name = \
+                get_last_coord_ind_name(self.field_names)
+
+        def get_err_indices(coord_name, field_names):
+            err_indices = []
+            for ind, fn in enumerate(field_names):
+                if (fn == coord_name + "_err" or
+                    fn.startswith(coord_name + "_err_")):
+                    err_indices.append(ind)
+            return err_indices
+
+        last_coord_indices = ([last_coord_ind] +
+                get_err_indices(last_coord_name, self.field_names)
+        )
+
+        # In Python 2 3/2 is 1, so we want to be safe;
+        # the downside is that integer-valued graphs
+        # will become floating, but that is doubtfully an issue.
+        # Remove when/if dropping support for Python 2.
+        rescale = float(other) / self._scale
+
+        mul = operator.mul
+        partial = functools.partial
+
+        # a version with lambda is about 50% slower:
+        # timeit.timeit('[*map(lambda val: val*2, vals)]', \
+        #     setup='vals = list(range(45)); from operator import mul; \
+        #     from functools import partial')
+        # 3.159
+        # same setup for
+        # timeit.timeit('[*map(partial(mul, 2), vals)]',...):
+        # 2.075
+        # 
+        # [*map(...)] is very slightly faster than list(map(...)),
+        # but it's unavailable in Python 2 (and anyway less readable).
+
+        # rescale arrays of values and errors
+        for ind, arr in enumerate(self._points):
+            if ind in last_coord_indices:
+                # Python lists are faster than arrays,
+                # https://stackoverflow.com/a/62399645/952234
+                # (because each time taking a value from an array
+                #  creates a Python object)
+                self._points[ind] = list(map(partial(mul, rescale),
+                                          arr))
+
+        self._scale = other
+
+        # as suggested in PEP 8
+        return None
+
+
 def _rescale_value(rescale, value):
     return rescale * lena.flow.get_data(value)
 

diff --git a/lena/structures/hist_functions.py b/lena/structures/hist_functions.py
@@ -15,7 +15,7 @@
     _reduce = reduce
 
 import lena.core
-import lena.structures.graph
+from .graph import Graph as _Graph
 
 
 class HistCell(collections.namedtuple("HistCell", ("edges, bin, index"))):
@@ -318,7 +318,7 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
 
     Return the resulting graph.
     """
-    gr = lena.structures.graph.Graph(scale=scale)
+    gr = _Graph(scale=scale)
 
     ## Could have allowed get_coordinate to be callable
     # (for generality), but 1) first find a use case,

diff --git a/tests/structures/test_graph.py b/tests/structures/test_graph.py
@@ -1,6 +1,3 @@
-"""Test graph."""
-from __future__ import print_function
-
 import copy
 import math
 import random
@@ -14,10 +11,67 @@
 from lena.core import LenaValueError
 from lena.structures import Histogram
 from lena.math import refine_mesh, isclose
-from lena.structures.graph import Graph
+from lena.structures.graph import graph, Graph
 # from histogram_strategy import generate_increasing_list, generate_data_in_range
 
 
+def test_graph_structure():
+    xs = [0, 1]
+    ys = [2, 3]
+
+    # simplest 2d initialization works
+    gr0 = graph([xs, ys])
+    assert gr0.field_names == ("x", "y")
+
+    # iteration works
+    assert list(gr0) == [(0, 2), (1, 3)]
+
+    assert gr0.scale() is None
+
+    # empty points raise
+    with pytest.raises(LenaValueError):
+        graph([], "")
+    # duplicate names raise
+    with pytest.raises(LenaValueError):
+        graph([xs, ys], "x,x")
+    # wrong sequence lengths raise
+    with pytest.raises(LenaValueError):
+        graph([[], [1]])
+    # field names are same as the points length
+    with pytest.raises(LenaValueError):
+        graph([xs, ys], "x")
+    # unset scale raises
+    with pytest.raises(LenaValueError):
+        gr0.scale(1)
+
+    # rescaling when the scale is set works
+    # 2d graph works
+    gr1 = graph(copy.deepcopy([xs, ys]), scale=2)
+    assert gr1.scale() == 2
+    gr1.scale(1)
+    assert gr1._points == [xs, [1, 1.5]]
+    assert gr1.scale() == 1
+
+    # 3d graph works
+    gr2 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x,y,z", scale=2)
+    gr2.scale(3)
+    assert gr2._points == [xs, ys, [1.5, 3.]]
+    assert gr2.scale() == 3
+
+    # graph with errors works
+    # x errors are unchanged, y coords change
+    gr3 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x, y, x_err", scale=2)
+    # spaces in field_names work
+    assert gr3.field_names == ["x", "y", "x_err"]
+    gr3.scale(1)
+    assert gr3._points == [xs, [1, 1.5], [1, 2]]
+
+    # y errors and coords change
+    gr4 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x,y,y_err", scale=2)
+    gr4.scale(1)
+    assert gr4._points == [xs, [1, 1.5], [0.5, 1]]
+
+
 def test_graph():
     # sorts well
     coords = range(0, 10)