Skip to content

Commit

Permalink
Move SplitIntoBins, MapBins, IterateBins, get_example_bin, cell_to_st…
Browse files Browse the repository at this point in the history
…ring from lena.flow to lena.structures.
  • Loading branch information
ynikitenko committed Nov 9, 2021
1 parent 2a45109 commit 1640c59
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 126 deletions.
15 changes: 0 additions & 15 deletions docs/source/flow.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,6 @@ Flow
Reverse
Slice

**Split into bins:**

.. currentmodule:: lena.flow.split_into_bins
.. autosummary::

IterateBins
MapBins
SplitIntoBins
cell_to_string
get_example_bin

Elements
--------
Elements form Lena sequences.
Expand Down Expand Up @@ -111,7 +100,3 @@ Iterators
.. automodule:: lena.flow.iterators
:special-members: __call__

Split into bins
---------------

.. automodule:: lena.flow.split_into_bins
16 changes: 16 additions & 0 deletions docs/source/structures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ Structures

HistToGraph

**Split into bins:**

.. currentmodule:: lena.structures.split_into_bins
.. autosummary::

IterateBins
MapBins
SplitIntoBins
cell_to_string
get_example_bin

**Histogram functions:**

.. currentmodule:: lena.structures.hist_functions
Expand Down Expand Up @@ -80,6 +91,11 @@ Graph
.. autoclass:: HistToGraph
:members:

Split into bins
---------------

.. automodule:: lena.structures.split_into_bins

Histogram functions
-------------------
.. automodule:: lena.structures.hist_functions
11 changes: 0 additions & 11 deletions lena/flow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@
from .print_ import Print
from .progress import Progress
from .selectors import Not, Selector
from .split_into_bins import (
IterateBins,
MapBins,
SplitIntoBins,
get_example_bin,
)
from .zip import Zip
from .filter import Filter

Expand Down Expand Up @@ -44,9 +38,4 @@
'Selector',
'seq_map',
'RunIf',
# split into bins
'SplitIntoBins',
'IterateBins',
'MapBins',
'get_example_bin',
]
24 changes: 20 additions & 4 deletions lena/structures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from .graph import Graph
from .histogram import histogram, Histogram
from .hist_functions import (
cell_to_string,
check_edges_increasing,
get_bin_edges,
get_bin_on_value_1d, get_bin_on_value, get_bin_on_index,
get_example_bin,
HistCell,
hist_to_graph,
init_bins,
Expand All @@ -15,22 +17,36 @@
unify_1_md
)
from .numpy_histogram import NumpyHistogram
from .split_into_bins import (
IterateBins,
MapBins,
SplitIntoBins,
)


__all__ = [
# structures
'Graph',
'histogram', 'Histogram',
'histogram',
'HistCell',
'Histogram',
'HistToGraph',
'NumpyHistogram',
# hist functions
'check_edges_increasing',
'cell_to_string',
'get_bin_edges',
'get_bin_on_value_1d', 'get_bin_on_value', 'get_bin_on_index',
'HistCell',
'HistToGraph',
'get_example_bin',
'hist_to_graph',
'init_bins',
'integral',
'iter_bins',
'iter_cells',
'make_hist_context',
'unify_1_md',
'NumpyHistogram',
# split into bins
'SplitIntoBins',
'IterateBins',
'MapBins',
]
3 changes: 2 additions & 1 deletion lena/structures/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def run(self, flow):
"""
from lena.flow import get_data_context
from lena.flow import get_context
get_example_bin = lena.flow.split_into_bins.get_example_bin
# don't know differences between these two ways of imports
get_example_bin = lena.structures.get_example_bin
update_nested = lena.context.update_nested
# why can't it be a Call element, which just returns
# unchanged values for non-histograms?
Expand Down
89 changes: 85 additions & 4 deletions lena/structures/hist_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

import lena.core
import lena.structures.graph
# todo: move it to this module
import lena.flow.split_into_bins


class HistCell(collections.namedtuple("HistCell", ("edges, bin, index"))):
Expand All @@ -26,6 +24,49 @@ class HistCell(collections.namedtuple("HistCell", ("edges, bin, index"))):
__slots__ = ()


def cell_to_string(
cell_edges, var_context=None, coord_names=None,
coord_fmt="{}_lte_{}_lt_{}", coord_join="_", reverse=False):
"""Transform cell edges into a string.
*cell_edges* is a tuple of pairs *(lower bound, upper bound)*
for each coordinate.
*coord_names* is a list of coordinates names.
*coord_fmt* is a string,
which defines how to format individual coordinates.
*coord_join* is a string, which joins coordinate pairs.
If *reverse* is True, coordinates are joined in reverse order.
"""
# todo: do we really need var_context?
# todo: even if so, why isn't that a {}? Is that dangerous?
if coord_names is None:
if var_context is None:
coord_names = [
"coord{}".format(ind) for ind in range(len(cell_edges))
]
else:
if "combine" in var_context:
coord_names = [var["name"]
for var in var_context["combine"]]
else:
coord_names = [var_context["name"]]
if len(cell_edges) != len(coord_names):
raise lena.core.LenaValueError(
"coord_names must have same length as cell_edges, "
"{} and {} given".format(coord_names, cell_edges)
)
coord_strings = [coord_fmt.format(edge[0], coord_names[ind], edge[1])
for (ind, edge) in enumerate(cell_edges)]
if reverse:
coord_strings = reversed(coord_strings)
coord_str = coord_join.join(coord_strings)
return coord_str


def _check_edges_increasing_1d(arr):
if len(arr) <= 1:
raise lena.core.LenaValueError("size of edges should be more than one,"
Expand Down Expand Up @@ -237,6 +278,23 @@ def get_bin_on_value(arg, edges):
return indices


def get_example_bin(struct):
"""Return bin with zero index on each axis of the histogram bins.
For example, if the histogram is two-dimensional, return hist[0][0].
*struct* can be a :class:`.histogram`
or an array of bins.
"""
if isinstance(struct, lena.structures.histogram):
return lena.structures.get_bin_on_index([0] * struct.dim, struct.bins)
else:
bins = struct
while isinstance(bins, list):
bins = bins[0]
return bins


def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
"""Convert a :class:`.histogram` to a :class:`.Graph`.
Expand Down Expand Up @@ -280,8 +338,7 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
'"{}" provided'.format(get_coordinate)
)

_ibe = lena.flow.split_into_bins._iter_bins_with_edges
for value, edges in _ibe(hist.bins, hist.edges):
for value, edges in _iter_bins_with_edges(hist.bins, hist.edges):
coord = get_coord(edges)
# todo: unclear when bin_context is present.
bin_value = lena.flow.get_data(value)
Expand Down Expand Up @@ -379,6 +436,30 @@ def iter_bins(bins):
yield (((ind,) + sub_ind), val)


def _iter_bins_with_edges(bins, edges):
"""Yield *(bin content, bin edges)* pairs.
*Bin edges* is a tuple, such that at index *i*
its element is bin's *(lower bound, upper bound)*
along *i*-th the coordinate.
"""
# todo: only a list or also a tuple, an array?
if not isinstance(edges[0], list):
edges = [edges]
bins_sizes = [len(edge)-1 for edge in edges]
indices = [list(range(nbins)) for nbins in bins_sizes]
for index in itertools.product(*indices):
bin_ = lena.structures.get_bin_on_index(index, bins)
edges_low = []
edges_high = []
for var, var_ind in enumerate(index):
edges_low.append(edges[var][var_ind])
edges_high.append(edges[var][var_ind+1])
yield (bin_, tuple(zip(edges_low, edges_high)))
# old interface:
# yield (bin_, (edges_low, edges_high))


def iter_cells(hist, ranges=None, coord_ranges=None):
"""Iterate cells of a histogram *hist*, possibly in a subrange.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,9 @@
import lena.math
import lena.structures
import lena.variables


def _iter_bins_with_edges(bins, edges):
"""Yield *(bin content, bin edges)* pairs.
*Bin edges* is a tuple, such that at index *i*
its element is bin's *(lower bound, upper bound)*
along *i*-th the coordinate.
"""
# todo: only a list or also a tuple, an array?
if not isinstance(edges[0], list):
edges = [edges]
bins_sizes = [len(edge)-1 for edge in edges]
indices = [list(range(nbins)) for nbins in bins_sizes]
for index in itertools.product(*indices):
bin_ = lena.structures.get_bin_on_index(index, bins)
edges_low = []
edges_high = []
for var, var_ind in enumerate(index):
edges_low.append(edges[var][var_ind])
edges_high.append(edges[var][var_ind+1])
yield (bin_, tuple(zip(edges_low, edges_high)))
# old interface:
# yield (bin_, (edges_low, edges_high))
from .hist_functions import (
cell_to_string, get_example_bin, _iter_bins_with_edges
)


class _MdSeqMap(object):
Expand All @@ -58,66 +37,6 @@ def __iter__(self):
return self


def cell_to_string(
cell_edges, var_context=None, coord_names=None,
coord_fmt="{}_lte_{}_lt_{}", coord_join="_", reverse=False):
"""Transform cell edges into a string.
*cell_edges* is a tuple of pairs *(lower bound, upper bound)*
for each coordinate.
*coord_names* is a list of coordinates names.
*coord_fmt* is a string,
which defines how to format individual coordinates.
*coord_join* is a string, which joins coordinate pairs.
If *reverse* is True, coordinates are joined in reverse order.
"""
# todo: do we really need var_context?
# todo: even if so, why isn't that a {}? Is that dangerous?
if coord_names is None:
if var_context is None:
coord_names = [
"coord{}".format(ind) for ind in range(len(cell_edges))
]
else:
if "combine" in var_context:
coord_names = [var["name"]
for var in var_context["combine"]]
else:
coord_names = [var_context["name"]]
if len(cell_edges) != len(coord_names):
raise lena.core.LenaValueError(
"coord_names must have same length as cell_edges, "
"{} and {} given".format(coord_names, cell_edges)
)
coord_strings = [coord_fmt.format(edge[0], coord_names[ind], edge[1])
for (ind, edge) in enumerate(cell_edges)]
if reverse:
coord_strings = reversed(coord_strings)
coord_str = coord_join.join(coord_strings)
return coord_str


def get_example_bin(struct):
"""Return bin with zero index on each axis of the histogram bins.
For example, if the histogram is two-dimensional, return hist[0][0].
*struct* can be a :class:`.histogram`
or an array of bins.
"""
if isinstance(struct, lena.structures.histogram):
return lena.structures.get_bin_on_index([0] * struct.dim, struct.bins)
else:
bins = struct
while isinstance(bins, list):
bins = bins[0]
return bins


class IterateBins(object):
"""Iterate bins of histograms."""

Expand Down

0 comments on commit 1640c59

Please sign in to comment.