Skip to content

Commit

Permalink
output.ToCSV adds histogram context. structures.make_hist_context is …
Browse files Browse the repository at this point in the history
…deprecated. Fix a bug in histogram.scale, which did not store a computed scale. histogram has a method _update_context (like graph). Histogram element no longer updates context during compute. structures.SplitIntoBins no longer updates histogram subcontext during compute.
  • Loading branch information
ynikitenko committed Apr 24, 2022
1 parent ef0489f commit f12468c
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 89 deletions.
4 changes: 1 addition & 3 deletions lena/output/to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,6 @@ def run(self, flow):
yield val
continue

### todo: add histogram context here
# (now it is added in hist_functions).

## histogram
if isinstance(data, lena.structures.histogram):
if data.dim == 1:
Expand All @@ -243,6 +240,7 @@ def run(self, flow):
continue

csv = "\n".join(lines_iter)
data._update_context(context)
lena.context.update_recursively(context, "output.filetype.csv")
yield (csv, context)
continue
Expand Down
32 changes: 11 additions & 21 deletions lena/structures/hist_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,38 +612,28 @@ def iter_cells(hist, ranges=None, coord_ranges=None):
ind)


def _make_hist_context(hist):
hc = {
"dim": hist.dim,
"nbins": hist.nbins,
"ranges": hist.ranges
}
# do we really add scale to context?
# If that is important, we must always calculate that.
# If that is not important, then why adding that?
# if hist._scale is not None:
# hc["scale"] = hist._scale
return hc


# todo: make private and completely refactor this function.
def make_hist_context(hist, context):
"""Update *context* with the context
"""Update a deep copy of *context* with the context
of a :class:`.histogram` *hist*.
Deep copy of updated context is returned.
.. deprecated:: 0.5
histogram context is updated automatically
during conversion in :class:`~.output.ToCSV`.
Use histogram._update_context explicitly if needed.
"""
all_context = copy.deepcopy(context)
# absolutely unnecessary.
context = copy.deepcopy(context)

hist_context = {
"histogram": {
"dim": hist.dim,
"nbins": hist.nbins,
"ranges": hist.ranges
}
}
all_context.update(hist_context)
return all_context
# return copy.deepcopy(all_context)
context.update(hist_context)
# just bad.
return context


def unify_1_md(bins, edges):
Expand Down
52 changes: 36 additions & 16 deletions lena/structures/histogram.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Histogram structure *histogram* and element *Histogram*."""
import copy

import lena.context
import lena.core
import lena.flow
import lena.math
from . import hist_functions as hf


Expand Down Expand Up @@ -198,12 +200,14 @@ def scale(self, other=None, recompute=False):
Histograms with scale equal to zero can't be rescaled.
:exc:`.LenaValueError` is raised if one tries to do that.
"""
# todo: reconsider this method. Probably get_scale
# and set_scale would be much better!
# see graph.scale comments why this is called simply "scale"
# (not set_scale, get_scale, etc.)
if other is None:
# return scale
if self._scale is None or recompute:
return hf.integral(*hf.unify_1_md(self.bins, self.edges))
self._scale = hf.integral(
*hf.unify_1_md(self.bins, self.edges)
)
return self._scale
else:
# rescale from other
Expand All @@ -212,11 +216,34 @@ def scale(self, other=None, recompute=False):
raise lena.core.LenaValueError(
"can not rescale histogram with zero scale"
)
self.bins = lena.math.md_map(lambda binc: binc * float(other) / scale,
self.bins = lena.math.md_map(lambda binc: binc*float(other) / scale,
self.bins)
self._scale = other
return None

def _update_context(self, context):
"""Update *context* with the properties of this histogram.
*context.histogram* is updated with "dim", "nbins"
and "ranges" with values for this histogram.
If this histogram has a computed scale, it is also added
to the context.
Called on "destruction" of the histogram structure (for example,
in :class:`.ToCSV`). See graph._update_context for more details.
"""

hist_context = {
"dim": self.dim,
"nbins": self.nbins,
"ranges": self.ranges
}

if self._scale is not None:
hist_context["scale"] = self._scale

lena.context.update_recursively(context, {"histogram": hist_context})


class Histogram():
"""An element to produce histograms."""
Expand Down Expand Up @@ -244,7 +271,7 @@ def __init__(self, edges, bins=None, make_bins=None, initial_value=0):
self._initial_bins = copy.deepcopy(bins)

# todo: bins, make_bins, initial_value look redundant
# and may be reconsidered.
# and may be reconsidered when really using reset().
if make_bins:
bins = make_bins()
self._make_bins = make_bins
Expand All @@ -263,16 +290,8 @@ def fill(self, value):
# self._hist.fill(data, weight)

def compute(self):
"""Yield histogram with context.
*context.histogram* is updated with histogram's attributes."""
## When used in split_into_bins, some cells might not be filled.
## This should not be an error.
## If your code really requires filled histograms, check it yourself.
# if not self.fill_called:
# # no data filled, no histogram is returned.
# raise StopIteration
yield (self._hist, hf.make_hist_context(self._hist, self._cur_context))
"""Yield histogram with context."""
yield (self._hist, self._cur_context)

def reset(self):
"""Reset the histogram.
Expand All @@ -281,10 +300,11 @@ def reset(self):
Bins are reinitialized with the *initial_value*
or with *make_bins()* (depending on the initialization).
"""
self._cur_context = {}
if self._make_bins is not None:
self.bins = self._make_bins()
elif self._initial_bins is not None:
self.bins = copy.deepcopy(self._initial_bins)
else:
self.bins = hf.init_bins(self.edges, self._initial_value)

self._cur_context = {}
4 changes: 4 additions & 0 deletions lena/structures/root_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def __init__(self, graph, type_code='d'):
See Python module
`array <https://docs.python.org/3/library/array.html>`_
for more options.
.. versionadded:: 0.5
"""

import ROOT
Expand Down Expand Up @@ -136,6 +138,8 @@ def __call__(self, value):
"""Convert data part of the value
(which must be a :class:`.graph`)
to :class:`.root_graph_errors`.
.. versionadded:: 0.5
"""
graph, context = lena.flow.get_data_context(value)
return (root_graph_errors(graph), context)
16 changes: 3 additions & 13 deletions lena/structures/split_into_bins.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,11 @@ def compute(self):
Computational context is preserved in histogram's bins.
:class:`.SplitIntoBins` adds context
as *histogram* (corresponding to :attr:`edges`)
and *variable* (corresponding to *arg_var*) subcontexts.
as a subcontext *variable* (corresponding to *arg_var*).
This allows unification of :class:`.SplitIntoBins`
with common analysis using histograms and variables
with common analysis using variables
(useful when creating plots from one template).
Old contexts, if exist,
are preserved in nested subcontexts
(that is *histogram.histogram* or *variable.variable*).
Existing context values are preserved.
Note
----
Expand All @@ -385,13 +382,6 @@ def compute(self):
self._arg_var._update_context(cur_context,
copy.deepcopy(self._arg_var.var_context))

# update histogram context
_hist = lena.structures.histogram(self.edges, self.bins)
# histogram context depends only on edges, not on data,
# and is thus same for all results
hist_context = lena.structures.hist_functions._make_hist_context(_hist)
lena.context.update_nested("histogram", cur_context, hist_context)

generators = _MdSeqMap(lambda cell: cell.compute(), self.bins)
# generators = lena.math.md_map(lambda cell: cell.compute(), self.bins)
while True:
Expand Down
38 changes: 23 additions & 15 deletions tests/output/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,21 @@ def test_to_csv():
res0 = list(to_csv.run([hist, gr0, 3, "a string"]))
# data parts are correct
data_0 = [res0[0][0], res0[1][0]]
# assert list(to_csv.run([hist, gr, 3])) == []
assert data_0 == [
'0.000000,1.000000\n1.000000,2.000000\n2.000000,2.000000',
'0,2.5\n1,3',
]
# other values are skipped
assert res0[2:] == [3, "a string"]

# no error fields
# histogram context part is correct
assert res0[0][1] == {
'histogram': {'dim': 1, 'nbins': [2], 'ranges': [(0, 2)]},
'output': {'filetype': 'csv'}
}
# graph context part has no error fields
assert res0[1][1] == {'output': {'filetype': 'csv'}}

# data._update_context is called
# data._update_context is called for not histograms
gr1 = graph([[0, 1], [2.5, 3], [0.1, 0.1]], field_names="x,y,error_y")
res1 = list(to_csv.run([gr1]))[0]
assert res1[1] == {
Expand All @@ -52,6 +56,7 @@ def test_to_csv():


def test_hist_to_csv():
## histogram functions work
hist = histogram(edges=[0, 1, 2], bins=[1, 2])
# test defaults
assert list(hist1d_to_csv(hist)) == [
Expand All @@ -63,6 +68,17 @@ def test_hist_to_csv():
assert list(hist1d_to_csv(hist, separator=' ', header='x bin')) == [
'x bin', '0.000000 1.000000', '1.000000 2.000000', '2.000000 2.000000']

hist = histogram(edges=[[0, 1, 2], [0, 2, 4]], bins=[[1, 2], [3, 4]])
assert list(hist2d_to_csv(hist)) == [
'0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
assert list(hist2d_to_csv(hist, header='x,y,z')) == [
'x,y,z', '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
assert list(hist2d_to_csv(hist, duplicate_last_bin=False)) == [
'0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000']
assert list(hist2d_to_csv(hist, separator=' ')) == [
'0.000000 0.000000 1.000000', '0.000000 2.000000 2.000000', '0.000000 4.000000 2.000000', '1.000000 0.000000 3.000000', '1.000000 2.000000 4.000000', '1.000000 4.000000 4.000000', '2.000000 0.000000 3.000000', '2.000000 2.000000 4.000000', '2.000000 4.000000 4.000000']

## ToCSV element works
hist = Histogram(edges=[0, 1, 2], bins=[1, 2])
to_csv = ToCSV()
hist_data = list(hist.compute())
Expand All @@ -72,18 +88,10 @@ def test_hist_to_csv():
'histogram': {'ranges': [(0, 2)], 'dim': 1, 'nbins': [2]}}
)]

hist = Histogram(edges=[[0, 1, 2], [0, 2, 4]], bins=[[1, 2], [3, 4]])
hist_data = list(hist.compute())
assert list(hist2d_to_csv(hist._hist)) == [
'0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
assert list(hist2d_to_csv(hist._hist, header='x,y,z')) == [
'x,y,z', '0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '0.000000,4.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000', '1.000000,4.000000,4.000000', '2.000000,0.000000,3.000000', '2.000000,2.000000,4.000000', '2.000000,4.000000,4.000000']
assert list(hist2d_to_csv(hist._hist, duplicate_last_bin=False)) == [
'0.000000,0.000000,1.000000', '0.000000,2.000000,2.000000', '1.000000,0.000000,3.000000', '1.000000,2.000000,4.000000']
assert list(hist2d_to_csv(hist._hist, separator=' ')) == [
'0.000000 0.000000 1.000000', '0.000000 2.000000 2.000000', '0.000000 4.000000 2.000000', '1.000000 0.000000 3.000000', '1.000000 2.000000 4.000000', '1.000000 4.000000 4.000000', '2.000000 0.000000 3.000000', '2.000000 2.000000 4.000000', '2.000000 4.000000 4.000000']

## maybe redundant
to_csv = ToCSV(duplicate_last_bin=False)
hist_el = Histogram(edges=[[0, 1, 2], [0, 2, 4]], bins=[[1, 2], [3, 4]])
hist_data = list(hist_el.compute())
assert list(to_csv.run(hist_data)) == [(
'0.000000,0.000000,1.000000\n0.000000,2.000000,2.000000\n1.000000,0.000000,3.000000\n1.000000,2.000000,4.000000', {'output': {'filetype': 'csv'}, 'histogram': {'ranges': [(0, 2), (0, 4)], 'dim': 2, 'nbins': [2, 2]}}
)]
22 changes: 22 additions & 0 deletions tests/structures/test_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,30 @@ def test_histogram_1d():

hist = histogram([0, 0.5, 1])
hist.fill(0.5)

# _update_context without scale works
context = {}
hist._update_context(context)
assert context == {"histogram":
{"dim": 1, "nbins": [2], "ranges": [(0, 1)]}
}

## scale works
# not initialized scale is set to None
assert hist._scale is None

# scale is computed correctly
assert hist.scale() == 0.5

# computed scale is saved
assert hist._scale == 0.5

# _update_context works with scale
hist._update_context(context)
assert context == {"histogram":
{"dim": 1, "nbins": [2], "ranges": [(0, 1)], "scale": hist.scale()}
}


if __name__ == "__main__":
test_histogram_3d()
Expand Down
14 changes: 12 additions & 2 deletions tests/structures/test_histogram_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,22 @@ def test_histogram_1d():
res00 = list(h0.compute())
assert res00[0][0] == hist

# fill outside of edges doesn't change
# fill outside of edges doesn't change histogram
h0.fill(-1)
res01 = list(h0.compute())
assert res01[0][0] == hist
assert res01[0][1] == {}

# reset doesn't change in our case
# but context is updated
context = {"context": True}
h0.fill((-1, context))
res01u = list(h0.compute())
assert res01u[0][0] == hist
assert res01u[0][1] == context

# reset doesn't change histogram in our case
h0.reset()
res02 = list(h0.compute())
assert res02[0][0] == hist
# cur_context is reset
assert res02[0][1] == {}

0 comments on commit f12468c

Please sign in to comment.