Skip to content

Commit

Permalink
Add equality testing and representation for graphs. Add field_names k…
Browse files Browse the repository at this point in the history
…eyword argument to structures.HistToGraph and hist_to_graph. hist_to_graph produces graphs (not Graphs). graph's field_names must be a tuple (lists are no longer allowed). Graph is deprecated.
  • Loading branch information
ynikitenko committed Feb 15, 2022
1 parent fa6b174 commit 5b91cfb
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 30 deletions.
13 changes: 11 additions & 2 deletions lena/structures/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
class HistToGraph():
"""Transform a :class:`.histogram` to a :class:`.Graph`."""

def __init__(self, make_value, get_coordinate="left"):
def __init__(self, make_value, get_coordinate="left", field_names=("x", "y")):
"""*make_value* is a :class:`.Variable`
that creates graph's value from the bin's value.
*get_coordinate* defines the coordinate of the graph's point.
By default, it is the left bin edge. Other allowed values are
"right" and "middle".
*field_names* set field names of resulting graphs.
Incorrect values for *make_value* or *get_coordinate* raise,
respectively,
:exc:`.LenaTypeError` or :exc:`.LenaValueError`.
Expand All @@ -31,13 +33,19 @@ def __init__(self, make_value, get_coordinate="left"):
"make_value must be a Variable, "
"{} given".format(make_value)
)
# todo: functions for coordinates should be allowed
# todo? functions for coordinates should be allowed
# -- see comment in hist_to_graph
# todo: do we need a run method, or should it be just __call__?
# -- see comment in the method! It should.
# todo: allow passing a scale(histogram, context?) function
# to create initial scales of graphs
if get_coordinate not in ["left", "right", "middle"]:
raise lena.core.LenaValueError(
'get_coordinate must be one of "left", "right" or "middle"; '
'"{}" provided'.format(get_coordinate)
)
self._get_coordinate = get_coordinate
self._field_names = field_names

def run(self, flow):
"""Iterate the *flow* and transform histograms to graphs.
Expand Down Expand Up @@ -86,6 +94,7 @@ def run(self, flow):
graph = hist_to_graph(
hist,
make_value=self._make_value.getter,
field_names=self._field_names,
get_coordinate=self._get_coordinate
)
yield (graph, context)
45 changes: 42 additions & 3 deletions lena/structures/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import operator
import re
import warnings

import lena.core
import lena.context
Expand Down Expand Up @@ -35,7 +36,8 @@ def __init__(self, points, field_names=("x", "y"), scale=None):
*field_names* can be a string separated by whitespace
and/or commas
or a sequence of strings, such as ["x", "y", "y_err"].
Field names must have as many elements as *points*,
*field_names* must be a tuple,
have as many elements as *points*,
and each field name must be unique.
Default field names are "x" and "y",
provided for the most used 2-dimensional graphs.
Expand Down Expand Up @@ -87,7 +89,14 @@ def __init__(self, points, field_names=("x", "y"), scale=None):
# split(', ') won't work.
# From https://stackoverflow.com/a/44785447/952234:
# \s stands for whitespace.
field_names = re.findall(r'[^,\s]+', field_names)
field_names = tuple(re.findall(r'[^,\s]+', field_names))
elif not isinstance(field_names, tuple):
# It might be non-Pythonic to require a tuple
# (to prohibit a list), but it's important
# for comparisons and uniformity
raise lena.core.LenaTypeError(
"field_names must be a string or a tuple"
)

if len(field_names) != len(points):
raise lena.core.LenaValueError(
Expand All @@ -114,11 +123,32 @@ def __init__(self, points, field_names=("x", "y"), scale=None):
# Probably we won't add methods __del__(n), __add__(*coords),
# since it might change the scale.

def __eq__(self, other):
"""Two graphs are equal, if and only if they have
equal coordinates, field names and scales.
If *other* is not a :class:`.graph`, return ``False``.
Note that floating numbers should be compared
approximately (using :func:`math.isclose`).
Therefore this comparison may give false negatives.
"""
if not isinstance(other, graph):
# in Python comparison between different types is allowed
return False
return (self._points == other._points and self._scale == other._scale
and self.field_names == other.field_names)

def __iter__(self):
"""Iterate graph points one by one."""
for val in zip(*self._points):
yield val

def __repr__(self):
return """graph({}, field_names={}, scale={}""".format(
self._points, self.field_names, self._scale
)

def scale(self, other=None):
"""Get or set the scale of the graph.
Expand Down Expand Up @@ -218,7 +248,13 @@ def _rescale_value(rescale, value):


class Graph(object):
"""Function at given coordinates (arbitraty dimensions).
"""
.. deprecated:: 0.5
use :class:`graph`.
This class may be used in the future,
but with a changed interface.
Function at given coordinates (arbitraty dimensions).
Graph points can be set during the initialization and
during :meth:`fill`. It can be rescaled (producing a new :class:`Graph`).
Expand Down Expand Up @@ -262,6 +298,9 @@ def __init__(self, points=None, context=None, scale=None, sort=True):
All filled values will be stored in it.
To reduce data, use histograms.
"""
warnings.warn("Graph is deprecated since Lena 0.5. Use graph.",
DeprecationWarning, stacklevel=2)

self._points = points if points is not None else []
# todo: add some sanity checks for points
self._scale = scale
Expand Down
47 changes: 35 additions & 12 deletions lena/structures/hist_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
_reduce = reduce

import lena.core
from .graph import Graph as _Graph
from .graph import graph as _graph, Graph as _Graph


class HistCell(collections.namedtuple("HistCell", ("edges, bin, index"))):
Expand Down Expand Up @@ -295,8 +295,9 @@ def get_example_bin(struct):
return bins


def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
"""Convert a :class:`.histogram` to a :class:`.Graph`.
def hist_to_graph(hist, make_value=None, get_coordinate="left",
field_names=("x", "y"), scale=None):
"""Convert a :class:`.histogram` to a :class:`.graph`.
*make_value* is a function to set the value of a graph's point.
By default it is bin content.
Expand All @@ -310,16 +311,17 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
>>> make_value = lambda bin_: (bin_.mean, bin_.mean_error)
*get_coordinate* defines what will be the coordinate
of a graph's point created from a histogram's bin.
*get_coordinate* defines what the coordinate
of a graph's point created from a histogram's bin will be.
It can be "left" (default), "right" and "middle".
*field_names* set field names of the graph. Their number
must be the same as the dimension of the result.
*scale* becomes the graph's scale (unknown by default).
Return the resulting graph.
"""
gr = _Graph(scale=scale)

## Could have allowed get_coordinate to be callable
# (for generality), but 1) first find a use case,
# 2) histogram bins could be adjusted in the first place.
Expand All @@ -338,19 +340,40 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
'"{}" provided'.format(get_coordinate)
)

# todo: make_value may be bad design.
# Maybe allow to change the graph in the sequence.
# However, make_value allows not to recreate a graph
# or its coordinates (if that is not needed).

points = [[] for _ in field_names]

chain = itertools.chain

for value, edges in _iter_bins_with_edges(hist.bins, hist.edges):
coord = get_coord(edges)
# todo: unclear when bin_context is present.
bin_value = lena.flow.get_data(value)
# todo: maybe it should be only a tuple?

# Since we never use contexts here, it will be optimal
# to ignore them completely (remove them elswhere).
# bin_value = lena.flow.get_data(value)
bin_value = value

## if we provide make_value, no need to adjust bin_value
# # todo: maybe it should be only a tuple?
# however, if there is no make_value, one-dimensional
# histogram might fail when chaining that
if not hasattr(bin_value, "__iter__"):
bin_value = (bin_value,)

if make_value is None:
graph_value = bin_value
else:
graph_value = make_value(bin_value)
gr.fill((coord, graph_value))
return gr

for arr, coord_ in zip(points, chain(coord, graph_value)):
arr.append(coord_)
# gr.fill((coord, graph_value))

return _graph(points, field_names, scale)


def init_bins(edges, value=0, deepcopy=False):
Expand Down
15 changes: 11 additions & 4 deletions tests/structures/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,22 @@ def test_graph_structure():

assert gr0.scale() is None

## points work correctly
# empty points raise
with pytest.raises(LenaValueError):
graph([], "")
# duplicate names raise
with pytest.raises(LenaValueError):
graph([xs, ys], "x,x")
# wrong sequence lengths raise
with pytest.raises(LenaValueError):
graph([[], [1]])

## field_names work correctly
# duplicate names raise
with pytest.raises(LenaValueError):
graph([xs, ys], "x,x")
# non-tuple field names raise
with pytest.raises(lena.core.LenaTypeError):
graph([xs, ys], ["x", "y"])

# field names are same as the points length
with pytest.raises(LenaValueError):
graph([xs, ys], "x")
Expand All @@ -62,7 +69,7 @@ def test_graph_structure():
# x errors are unchanged, y coords change
gr3 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x, y, x_err", scale=2)
# spaces in field_names work
assert gr3.field_names == ["x", "y", "x_err"]
assert gr3.field_names == ("x", "y", "x_err")
gr3.scale(1)
assert gr3._points == [xs, [1, 1.5], [1, 2]]

Expand Down
23 changes: 14 additions & 9 deletions tests/structures/test_hist_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import lena.structures
from lena.core import LenaIndexError, LenaTypeError, LenaValueError
from lena.math import mesh
from lena.structures import histogram, Graph
from lena.structures import histogram, graph, Graph
from lena.structures import (
check_edges_increasing,
get_bin_edges,
Expand Down Expand Up @@ -113,32 +113,37 @@ def test_hist_to_graph():
nevents = Variable("nevents", lambda nevents: nevents)
htg = HistToGraph(nevents)
nev_context = {'value': {'variable': {'name': 'nevents'}}}
gr = graph([[0], [1]])

# run works correctly
assert list(htg.run(data)) == [
0,
(Graph(points=[((0,), (1,))], scale=None, sort=True),
nev_context),
(Graph(points=[((0,), (1,))], scale=None, sort=True),
nev_context),
(gr, nev_context),
(gr, nev_context),
# values with the specified context are skipped
(histogram([0, 1], bins=[1]),
{'histogram': {'to_graph': False}}
),
]

# different coordinates work
assert list(HistToGraph(nevents, get_coordinate="right").run([hist])) == \
[(Graph(points=[((1,), (1,))], scale=None, sort=True), nev_context)]
[(graph([[1], [1]], scale=None), nev_context)]
assert list(HistToGraph(nevents, get_coordinate="middle").run([hist])) == \
[(Graph(points=[((0.5,), (1,))], scale=None, sort=True), nev_context)]
[(graph([[0.5], [1]], scale=None), nev_context)]

val_with_error = collections.namedtuple("val_with_error", ["value", "error"])
hist1 = histogram(mesh((0, 1), 1))
val = val_with_error(1, 2)
hist1.bins = lena.structures.init_bins(hist1.edges, val)
transform_value = Variable("value_error",
lambda val: (val.value, val.error))
assert list(HistToGraph(make_value=transform_value).run([hist1])) == \
[(Graph(points=[((0,), (1, 2))], scale=None, sort=True),
assert list(
HistToGraph(
make_value=transform_value, field_names=("x", "y", "z")
).run([hist1])
) == \
[(graph([[0], [1], [2]], field_names="x,y,z"),
{'value': {'variable': {'name': 'value_error'}}})]

# wrong make_value raises
Expand Down

0 comments on commit 5b91cfb

Please sign in to comment.