Skip to content

Commit

Permalink
Add a graph structure (documented and tested).
Browse files Browse the repository at this point in the history
  • Loading branch information
ynikitenko committed Feb 14, 2022
1 parent 095c4f6 commit fa6b174
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 9 deletions.
4 changes: 4 additions & 0 deletions docs/source/structures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Structures
.. currentmodule:: lena.structures.graph
.. autosummary::

graph
Graph

.. currentmodule:: lena.structures.elements
Expand Down Expand Up @@ -84,6 +85,9 @@ Graph
-----

.. module:: lena.structures.graph
.. autoclass:: graph
:members:

.. autoclass:: Graph
:members:

Expand Down
4 changes: 2 additions & 2 deletions lena/structures/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .elements import HistToGraph
from .graph import Graph
from .graph import graph, Graph
from .histogram import histogram, Histogram
from .hist_functions import (
cell_to_string,
Expand All @@ -26,7 +26,7 @@

__all__ = [
# structures
'Graph',
'graph', 'Graph',
'histogram',
'HistCell',
'Histogram',
Expand Down
209 changes: 208 additions & 1 deletion lena/structures/graph.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,218 @@
"""Graph is a function at given points."""
"""A graph is a function at given points."""
import copy
import functools
import operator
import re

import lena.core
import lena.context
import lena.flow


class graph():
"""Numeric arrays of equal size."""

def __init__(self, points, field_names=("x", "y"), scale=None):
"""This structure generally corresponds
to the graph of a function
and represents arrays of coordinates and the function values
of arbitrary dimensions.
*points* is a list of one-dimensional
coordinate and value sequences (usually lists).
There is little to no distinction between them,
and "values" can also be called "coordinates".
*field_names* provide the meaning of these arrays.
For example, a 3-dimensional graph could be distinguished
from a 2-dimensional graph with errors by its fields
("x", "y", "z") instead of ("x", "y", "y_err"),
or ("x", "y", "y_err_low", "y_err_high").
Field names are used to transform Lena graphs to graphs
from other libraries.
Field names don't affect drawing graphs:
for that :class:`~Variable`-s should be used.
*field_names* can be a string separated by whitespace
and/or commas
or a sequence of strings, such as ["x", "y", "y_err"].
Field names must have as many elements as *points*,
and each field name must be unique.
Default field names are "x" and "y",
provided for the most used 2-dimensional graphs.
Error fields must go after all other coordinates.
Names of coordinate errors are those of coordinates plus "_err",
further error details are appended after '_'
(see the examples above).
Otherwise field names are arbitrary.
*scale* of the graph is a kind of its norm. It could be
the integral of the function or its other property.
A scale of a normalised probability density
function would be one.
An initialized *scale* is required if one needs
to renormalise the graph in :meth:`scale`
(for example, to plot it with other graphs).
Coordinates of a function graph would usually be arrays
of increasing values, which is not required here.
Neither is it checked that coordinates indeed
contain one-dimensional numeric values.
However, non-standard graphs
will likely lead to errors during plotting
and will require more programmer's work and caution,
so use them only if you understand what you are doing.
A graph can be iterated yielding tuples of numbers
for each point. Graph field names can be accessed
as its *field_names* attribute.
"""
if not points:
raise lena.core.LenaValueError(
"points must be a non-empty sequence "
"of coordinate sequences"
)

# require points to be of the same size
pt_len = len(points[0])
for arr in points[1:]:
if len(arr) != pt_len:
raise lena.core.LenaValueError(
"points must have subsequences of equal lengths"
)

# Unicode (Python 2) field names would be just bad,
# so we don't check for it here.
if isinstance(field_names, str):
# split(', ') won't work.
# From https://stackoverflow.com/a/44785447/952234:
# \s stands for whitespace.
field_names = re.findall(r'[^,\s]+', field_names)

if len(field_names) != len(points):
raise lena.core.LenaValueError(
"field_names must have must have the same size as points"
)

if len(set(field_names)) != len(field_names):
raise lena.core.LenaValueError(
"field_names contain duplicate names"
)

# todo: or just fields?..
self.field_names = field_names
self._points = points
self._scale = scale

# todo: add subsequences of points as attributes
# with field names.
# In case if someone wants to create a graph of another function
# at the same coordinates.
# Should a) work when we rescale the graph
# b) not interfere with other fields and methods

# Probably we won't add methods __del__(n), __add__(*coords),
# since it might change the scale.

def __iter__(self):
"""Iterate graph points one by one."""
for val in zip(*self._points):
yield val

def scale(self, other=None):
"""Get or set the scale of the graph.
If *other* is ``None``, return the scale of this graph.
If a numeric *other* is provided, rescale to that value.
If the graph has unknown or zero scale,
rescaling that will raise :exc:`~.LenaValueError`.
To get meaningful results, graph's fields are used.
Only the last coordinate is rescaled.
For example, if the graph has *x* and *y* coordinates,
then *y* will be rescaled, and for a 3-dimensional graph
*z* will be rescaled.
All errors are also rescaled together with their coordinate.
"""
# this method is called scale() for uniformity with histograms
# And this looks really good: explicit for computations
# (not a subtle graph.scale, like a constant field (which is,
# however, the case in graph - but not in other structures))
# and easy to remember (set_scale? rescale? change_scale_to?..)

# Abandoned: that would be redundant (not optimal)
# to create a new graph
# if we only want to change the scale of the existing one.
## A new :class:`graph` is returned, the original is unchanged.

if other is None:
return self._scale

if not self._scale:
raise lena.core.LenaValueError(
"can't rescale a graph with zero or unknown scale"
)

def get_last_coord_ind_name(field_names):
for ind, fn in enumerate(field_names):
if fn.endswith("_err") or "_err_" in fn:
ind -= 1
break
return (ind, field_names[ind])

last_coord_ind, last_coord_name = \
get_last_coord_ind_name(self.field_names)

def get_err_indices(coord_name, field_names):
err_indices = []
for ind, fn in enumerate(field_names):
if (fn == coord_name + "_err" or
fn.startswith(coord_name + "_err_")):
err_indices.append(ind)
return err_indices

last_coord_indices = ([last_coord_ind] +
get_err_indices(last_coord_name, self.field_names)
)

# In Python 2 3/2 is 1, so we want to be safe;
# the downside is that integer-valued graphs
# will become floating, but that is doubtfully an issue.
# Remove when/if dropping support for Python 2.
rescale = float(other) / self._scale

mul = operator.mul
partial = functools.partial

# a version with lambda is about 50% slower:
# timeit.timeit('[*map(lambda val: val*2, vals)]', \
# setup='vals = list(range(45)); from operator import mul; \
# from functools import partial')
# 3.159
# same setup for
# timeit.timeit('[*map(partial(mul, 2), vals)]',...):
# 2.075
#
# [*map(...)] is very slightly faster than list(map(...)),
# but it's unavailable in Python 2 (and anyway less readable).

# rescale arrays of values and errors
for ind, arr in enumerate(self._points):
if ind in last_coord_indices:
# Python lists are faster than arrays,
# https://stackoverflow.com/a/62399645/952234
# (because each time taking a value from an array
# creates a Python object)
self._points[ind] = list(map(partial(mul, rescale),
arr))

self._scale = other

# as suggested in PEP 8
return None


def _rescale_value(rescale, value):
return rescale * lena.flow.get_data(value)

Expand Down
4 changes: 2 additions & 2 deletions lena/structures/hist_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
_reduce = reduce

import lena.core
import lena.structures.graph
from .graph import Graph as _Graph


class HistCell(collections.namedtuple("HistCell", ("edges, bin, index"))):
Expand Down Expand Up @@ -318,7 +318,7 @@ def hist_to_graph(hist, make_value=None, get_coordinate="left", scale=None):
Return the resulting graph.
"""
gr = lena.structures.graph.Graph(scale=scale)
gr = _Graph(scale=scale)

## Could have allowed get_coordinate to be callable
# (for generality), but 1) first find a use case,
Expand Down
62 changes: 58 additions & 4 deletions tests/structures/test_graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
"""Test graph."""
from __future__ import print_function

import copy
import math
import random
Expand All @@ -14,10 +11,67 @@
from lena.core import LenaValueError
from lena.structures import Histogram
from lena.math import refine_mesh, isclose
from lena.structures.graph import Graph
from lena.structures.graph import graph, Graph
# from histogram_strategy import generate_increasing_list, generate_data_in_range


def test_graph_structure():
xs = [0, 1]
ys = [2, 3]

# simplest 2d initialization works
gr0 = graph([xs, ys])
assert gr0.field_names == ("x", "y")

# iteration works
assert list(gr0) == [(0, 2), (1, 3)]

assert gr0.scale() is None

# empty points raise
with pytest.raises(LenaValueError):
graph([], "")
# duplicate names raise
with pytest.raises(LenaValueError):
graph([xs, ys], "x,x")
# wrong sequence lengths raise
with pytest.raises(LenaValueError):
graph([[], [1]])
# field names are same as the points length
with pytest.raises(LenaValueError):
graph([xs, ys], "x")
# unset scale raises
with pytest.raises(LenaValueError):
gr0.scale(1)

# rescaling when the scale is set works
# 2d graph works
gr1 = graph(copy.deepcopy([xs, ys]), scale=2)
assert gr1.scale() == 2
gr1.scale(1)
assert gr1._points == [xs, [1, 1.5]]
assert gr1.scale() == 1

# 3d graph works
gr2 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x,y,z", scale=2)
gr2.scale(3)
assert gr2._points == [xs, ys, [1.5, 3.]]
assert gr2.scale() == 3

# graph with errors works
# x errors are unchanged, y coords change
gr3 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x, y, x_err", scale=2)
# spaces in field_names work
assert gr3.field_names == ["x", "y", "x_err"]
gr3.scale(1)
assert gr3._points == [xs, [1, 1.5], [1, 2]]

# y errors and coords change
gr4 = graph(copy.deepcopy([xs, ys, [1, 2]]), field_names="x,y,y_err", scale=2)
gr4.scale(1)
assert gr4._points == [xs, [1, 1.5], [0.5, 1]]


def test_graph():
# sorts well
coords = range(0, 10)
Expand Down

0 comments on commit fa6b174

Please sign in to comment.