output.ToCSV adds histogram context. structures.make_hist_context is …

…deprecated. Fix a bug in histogram.scale, which did not store a computed scale. histogram has a method _update_context (like graph). Histogram element no longer updates context during compute. structures.SplitIntoBins no longer updates histogram subcontext during compute.
ynikitenko · Apr 24, 2022 · f12468c · f12468c
1 parent ef0489f
commit f12468c
Show file tree

Hide file tree

Showing 9 changed files with 121 additions and 89 deletions.
diff --git a/lena/output/to_csv.py b/lena/output/to_csv.py
@@ -219,9 +219,6 @@ def run(self, flow):
                 yield val
                 continue
 
-            ### todo: add histogram context here
-            # (now it is added in hist_functions).
-
             ## histogram
             if isinstance(data, lena.structures.histogram):
                 if data.dim == 1:
@@ -243,6 +240,7 @@ def run(self, flow):
                     continue
 
                 csv = "\n".join(lines_iter)
+                data._update_context(context)
                 lena.context.update_recursively(context, "output.filetype.csv")
                 yield (csv, context)
                 continue

diff --git a/lena/structures/hist_functions.py b/lena/structures/hist_functions.py
@@ -612,38 +612,28 @@ def iter_cells(hist, ranges=None, coord_ranges=None):
                        ind)
 
 
-def _make_hist_context(hist):
-    hc = {
-        "dim": hist.dim,
-        "nbins": hist.nbins,
-        "ranges": hist.ranges
-    }
-    # do we really add scale to context?
-    # If that is important, we must always calculate that.
-    # If that is not important, then why adding that?
-    # if hist._scale is not None:
-    #     hc["scale"] = hist._scale
-    return hc
-
-
-# todo: make private and completely refactor this function.
 def make_hist_context(hist, context):
-    """Update *context* with the context
+    """Update a deep copy of *context* with the context
     of a :class:`.histogram` *hist*.
 
-    Deep copy of updated context is returned.
+    .. deprecated:: 0.5
+       histogram context is updated automatically
+       during conversion in :class:`~.output.ToCSV`.
+       Use histogram._update_context explicitly if needed.
     """
-    all_context = copy.deepcopy(context)
+    # absolutely unnecessary.
+    context = copy.deepcopy(context)
+
     hist_context = {
         "histogram": {
             "dim": hist.dim,
             "nbins": hist.nbins,
             "ranges": hist.ranges
         }
     }
-    all_context.update(hist_context)
-    return all_context
-    # return copy.deepcopy(all_context)
+    context.update(hist_context)
+    # just bad.
+    return context
 
 
 def unify_1_md(bins, edges):

diff --git a/lena/structures/histogram.py b/lena/structures/histogram.py
@@ -1,8 +1,10 @@
 """Histogram structure *histogram* and element *Histogram*."""
 import copy
 
+import lena.context
 import lena.core
 import lena.flow
+import lena.math
 from . import hist_functions as hf
 
 
@@ -198,12 +200,14 @@ def scale(self, other=None, recompute=False):
         Histograms with scale equal to zero can't be rescaled.
         :exc:`.LenaValueError` is raised if one tries to do that.
         """
-        # todo: reconsider this method. Probably get_scale
-        # and set_scale would be much better!
+        # see graph.scale comments why this is called simply "scale"
+        # (not set_scale, get_scale, etc.)
         if other is None:
             # return scale
             if self._scale is None or recompute:
-                return hf.integral(*hf.unify_1_md(self.bins, self.edges))
+                self._scale = hf.integral(
+                    *hf.unify_1_md(self.bins, self.edges)
+                )
             return self._scale
         else:
             # rescale from other
@@ -212,11 +216,34 @@ def scale(self, other=None, recompute=False):
                 raise lena.core.LenaValueError(
                     "can not rescale histogram with zero scale"
                 )
-            self.bins = lena.math.md_map(lambda binc: binc * float(other) / scale,
+            self.bins = lena.math.md_map(lambda binc: binc*float(other) / scale,
                                          self.bins)
             self._scale = other
             return None
 
+    def _update_context(self, context):
+        """Update *context* with the properties of this histogram.
+
+        *context.histogram* is updated with "dim", "nbins"
+        and "ranges" with values for this histogram.
+        If this histogram has a computed scale, it is also added
+        to the context.
+
+        Called on "destruction" of the histogram structure (for example,
+        in :class:`.ToCSV`). See graph._update_context for more details.
+        """
+
+        hist_context = {
+            "dim": self.dim,
+            "nbins": self.nbins,
+            "ranges": self.ranges
+        }
+
+        if self._scale is not None:
+            hist_context["scale"] = self._scale
+
+        lena.context.update_recursively(context, {"histogram": hist_context})
+
 
 class Histogram():
     """An element to produce histograms."""
@@ -244,7 +271,7 @@ def __init__(self, edges, bins=None, make_bins=None, initial_value=0):
         self._initial_bins = copy.deepcopy(bins)
 
         # todo: bins, make_bins, initial_value look redundant
-        # and may be reconsidered.
+        # and may be reconsidered when really using reset().
         if make_bins:
             bins = make_bins()
         self._make_bins = make_bins
@@ -263,16 +290,8 @@ def fill(self, value):
         # self._hist.fill(data, weight)
 
     def compute(self):
-        """Yield histogram with context.
-
-        *context.histogram* is updated with histogram's attributes."""
-        ## When used in split_into_bins, some cells might not be filled.
-        ## This should not be an error.
-        ## If your code really requires filled histograms, check it yourself.
-        # if not self.fill_called:
-        #     # no data filled, no histogram is returned.
-        #     raise StopIteration
-        yield (self._hist, hf.make_hist_context(self._hist, self._cur_context))
+        """Yield histogram with context."""
+        yield (self._hist, self._cur_context)
 
     def reset(self):
         """Reset the histogram.
@@ -281,10 +300,11 @@ def reset(self):
         Bins are reinitialized with the *initial_value*
         or with *make_bins()* (depending on the initialization).
         """
-        self._cur_context = {}
         if self._make_bins is not None:
             self.bins = self._make_bins()
         elif self._initial_bins is not None:
             self.bins = copy.deepcopy(self._initial_bins)
         else:
             self.bins = hf.init_bins(self.edges, self._initial_value)
+
+        self._cur_context = {}
diff --git a/lena/structures/root_graphs.py b/lena/structures/root_graphs.py
@@ -24,6 +24,8 @@ def __init__(self, graph, type_code='d'):
         See Python module
         `array <https://docs.python.org/3/library/array.html>`_
         for more options.
+
+        .. versionadded:: 0.5
         """
 
         import ROOT
@@ -136,6 +138,8 @@ def __call__(self, value):
         """Convert data part of the value
         (which must be a :class:`.graph`)
         to :class:`.root_graph_errors`.
+
+        .. versionadded:: 0.5
         """
         graph, context = lena.flow.get_data_context(value)
         return (root_graph_errors(graph), context)
diff --git a/lena/structures/split_into_bins.py b/lena/structures/split_into_bins.py
@@ -362,14 +362,11 @@ def compute(self):
         Computational context is preserved in histogram's bins.
 
         :class:`.SplitIntoBins` adds context
-        as *histogram* (corresponding to :attr:`edges`)
-        and *variable* (corresponding to *arg_var*) subcontexts.
+        as a subcontext *variable* (corresponding to *arg_var*).
         This allows unification of :class:`.SplitIntoBins`
-        with common analysis using histograms and variables
+        with common analysis using variables
         (useful when creating plots from one template).
-        Old contexts, if exist,
-        are preserved in nested subcontexts
-        (that is *histogram.histogram* or *variable.variable*).
+        Existing context values are preserved.
 
         Note
         ----
@@ -385,13 +382,6 @@ def compute(self):
         self._arg_var._update_context(cur_context,
                                       copy.deepcopy(self._arg_var.var_context))
 
-        # update histogram context
-        _hist = lena.structures.histogram(self.edges, self.bins)
-        # histogram context depends only on edges, not on data,
-        # and is thus same for all results
-        hist_context = lena.structures.hist_functions._make_hist_context(_hist)
-        lena.context.update_nested("histogram", cur_context, hist_context)
-
         generators = _MdSeqMap(lambda cell: cell.compute(), self.bins)
         # generators = lena.math.md_map(lambda cell: cell.compute(), self.bins)
         while True:

diff --git a/tests/output/test_to_csv.py b/tests/output/test_to_csv.py
@@ -27,17 +27,21 @@ def test_to_csv():
     res0 = list(to_csv.run([hist, gr0, 3, "a string"]))
     # data parts are correct
     data_0 = [res0[0][0], res0[1][0]]
-    # assert list(to_csv.run([hist, gr, 3])) == []
     assert data_0 == [
         '0.000000,1.000000\n1.000000,2.000000\n2.000000,2.000000',
         '0,2.5\n1,3',
     ]
+    # other values are skipped
     assert res0[2:] == [3, "a string"]
-
-    # no error fields
+    # histogram context part is correct
+    assert res0[0][1] == {
+        'histogram': {'dim': 1, 'nbins': [2], 'ranges': [(0, 2)]},
+        'output': {'filetype': 'csv'}
+    }
+    # graph context part has no error fields
     assert res0[1][1] == {'output': {'filetype': 'csv'}}
 
-    # data._update_context is called
+    # data._update_context is called for not histograms
     gr1 = graph([[0, 1], [2.5, 3], [0.1, 0.1]], field_names="x,y,error_y")
     res1 = list(to_csv.run([gr1]))[0]
     assert res1[1] == {
@@ -52,6 +56,7 @@ def test_to_csv():
 
 
 def test_hist_to_csv():
+    ## histogram functions work
     hist = histogram(edges=[0, 1, 2], bins=[1, 2])
     # test defaults
     assert list(hist1d_to_csv(hist)) == [
@@ -63,6 +68,17 @@ def test_hist_to_csv():
     assert list(hist1d_to_csv(hist, separator=' ', header='x bin')) == [
             'x bin', '0.000000 1.000000', '1.000000 2.000000', '2.000000 2.000000']
 
+    hist = histogram(edges=[[0, 1, 2], [0, 2, 4]], bins=[[1, 2], [3, 4]])
+    assert list(hist2d_to_csv(hist)) == [
+            '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
+    assert list(hist2d_to_csv(hist, header='x,y,z')) == [
+            'x,y,z', '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
+    assert list(hist2d_to_csv(hist, duplicate_last_bin=False)) == [
+            '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000']
+    assert list(hist2d_to_csv(hist, separator=' ')) == [
+            '0.000000 0.000000 1.000000', '0.000000 2.000000 2.000000', '0.000000 4.000000 2.000000', '1.000000 0.000000 3.000000', '1.000000 2.000000 4.000000', '1.000000 4.000000 4.000000', '2.000000 0.000000 3.000000', '2.000000 2.000000 4.000000', '2.000000 4.000000 4.000000']
+
+    ## ToCSV element works
     hist = Histogram(edges=[0, 1, 2], bins=[1, 2])
     to_csv = ToCSV()
     hist_data = list(hist.compute())
@@ -72,18 +88,10 @@ def test_hist_to_csv():
                     'histogram': {'ranges': [(0, 2)], 'dim': 1, 'nbins': [2]}}
             )]
 
-    hist = Histogram(edges=[[0, 1, 2], [0, 2, 4]], bins=[[1, 2], [3, 4]])
-    hist_data = list(hist.compute())
-    assert list(hist2d_to_csv(hist._hist)) == [
-            '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
-    assert list(hist2d_to_csv(hist._hist, header='x,y,z')) == [
-            'x,y,z', '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
-    assert list(hist2d_to_csv(hist._hist, duplicate_last_bin=False)) == [
-            '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000']
-    assert list(hist2d_to_csv(hist._hist, separator=' ')) == [
-            '0.000000 0.000000 1.000000', '0.000000 2.000000 2.000000', '0.000000 4.000000 2.000000', '1.000000 0.000000 3.000000', '1.000000 2.000000 4.000000', '1.000000 4.000000 4.000000', '2.000000 0.000000 3.000000', '2.000000 2.000000 4.000000', '2.000000 4.000000 4.000000']
-
+    ## maybe redundant
     to_csv = ToCSV(duplicate_last_bin=False)
+    hist_el = Histogram(edges=[[0, 1, 2], [0, 2, 4]], bins=[[1, 2], [3, 4]])
+    hist_data = list(hist_el.compute())
     assert list(to_csv.run(hist_data)) == [(
         '0.000000,0.000000,1.000000\n0.000000,2.000000,2.000000\n1.000000,0.000000,3.000000\n1.000000,2.000000,4.000000', {'output': {'filetype': 'csv'}, 'histogram': {'ranges': [(0, 2), (0, 4)], 'dim': 2, 'nbins': [2, 2]}}
         )]
diff --git a/tests/structures/test_histogram.py b/tests/structures/test_histogram.py
@@ -154,8 +154,30 @@ def test_histogram_1d():
 
     hist = histogram([0, 0.5, 1])
     hist.fill(0.5)
+
+    # _update_context without scale works
+    context = {}
+    hist._update_context(context)
+    assert context == {"histogram":
+        {"dim": 1, "nbins": [2], "ranges": [(0, 1)]}
+    }
+
+    ## scale works
+    # not initialized scale is set to None
+    assert hist._scale is None
+
+    # scale is computed correctly
     assert hist.scale() == 0.5
 
+    # computed scale is saved
+    assert hist._scale == 0.5
+
+    # _update_context works with scale
+    hist._update_context(context)
+    assert context == {"histogram":
+        {"dim": 1, "nbins": [2], "ranges": [(0, 1)], "scale": hist.scale()}
+    }
+
 
 if __name__ == "__main__":
     test_histogram_3d()

diff --git a/tests/structures/test_histogram_element.py b/tests/structures/test_histogram_element.py
@@ -13,12 +13,22 @@ def test_histogram_1d():
     res00 = list(h0.compute())
     assert res00[0][0] == hist
 
-    # fill outside of edges doesn't change
+    # fill outside of edges doesn't change histogram
     h0.fill(-1)
     res01 = list(h0.compute())
     assert res01[0][0] == hist
+    assert res01[0][1] == {}
 
-    # reset doesn't change in our case
+    # but context is updated
+    context = {"context": True}
+    h0.fill((-1, context))
+    res01u = list(h0.compute())
+    assert res01u[0][0] == hist
+    assert res01u[0][1] == context
+
+    # reset doesn't change histogram in our case
     h0.reset()
     res02 = list(h0.compute())
     assert res02[0][0] == hist
+    # cur_context is reset
+    assert res02[0][1] == {}