Skip to content

Commit

Permalink
math.Vectorize no longer allows learning data dimension from flow. It…
Browse files Browse the repository at this point in the history
… allows one to use rigid elements (fully constructed during initialisation) and metaprogramming.
  • Loading branch information
ynikitenko committed Sep 18, 2023
1 parent 39e7323 commit 9dc30b8
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 95 deletions.
105 changes: 29 additions & 76 deletions lena/math/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
import lena.context
import lena.core
from lena.core import (
LenaTypeError, LenaRuntimeError, LenaZeroDivisionError, LenaValueError
LenaTypeError, LenaRuntimeError, LenaZeroDivisionError, LenaValueError,
is_fill_compute_el
)
import lena.flow

Expand Down Expand Up @@ -255,96 +256,48 @@ def reset(self):
class Vectorize(object):
"""Apply an algorithm to a vector component-wise."""

def __init__(self, seq, construct=None, dim=-1):
"""*seq* is converted to a :class:`.FillComputeSeq`.
def __init__(self, seq, dim=-1, construct=None):
"""*seq* must be a *FillCompute* element or sequence.
Return type during :meth:`compute` will be know from the first
filled element.
*construct* is needed in case the flow was empty.
It will provide the needed dimension and data type.
However, often an object constructor can allow
an arbitrary dimension (like ``tuple``).
In that case, provide *dim*.
*dim* is the dimension of the input data
(and of the constructed structure).
*seq* may also be a list of sequences, in that case
*dim* may be omitted.
*seq* can be a list of :class:`.FillComputeSeq`-s.
In that case dimension should not be provided.
*construct* allows one to create an arbitrary object
(by default the resulting values are tuples of dimension *dim*).
"""
default_dim = -1
# todo: if needed, a list *seq* could mean
# a list of sequences of the needed dimension.
# Vectorize should be a rigid element
# (we don't change its dimension easily),
# therefore its dimension is set during initialisation.
# if isinstance(seq, list):
if isinstance(seq, list):
# Vectorize should be a rigid element
# (we don't change its dimension easily),
# but list is associated with parellelism in Lena
# seq must consist of FillComputeSeq-s,
# we don't init them automatically here
if dim != -1:
raise LenaTypeError(
"no dimension should be provided with a list"
)
self._seqs = seq
assert dim == default_dim
dim = len(seq)
self.fill = self._fill_others
else:
try:
self._seq = lena.core.FillComputeSeq(seq)
except TypeError:
if dim == -1:
raise LenaTypeError(
"dim must be provided with a sequence"
)
if not is_fill_compute_el(seq):
raise lena.core.LenaTypeError(
"seq must be convertible to FillComputeSeq"
"seq must be a FillCompute element or sequence"
)
if dim == default_dim:
pass
# self.fill = self._fill_first
else:
self._seqs = [self._seq]
self._seqs.extend([copy.deepcopy(self._seq) for _ in range(dim-1)])
# self.fill = self._fill_others

## No need to get dim from here. Explicit dim would never hurt.
# if dim == default_dim and construct is not None:
# try:
# _tmp = construct()
# dim = len(_tmp)
# except TypeError:
# # we have a chance to get data dimension from flow
# pass

self._seqs = [seq]
self._seqs.extend([copy.deepcopy(seq) for _ in range(dim-1)])

# todo: get rid of construct,
# a separate Lena element may be better.
self._construct = construct
self._dim = dim
self._dim = len(self._seqs)
self._cur_context = {}
self._filled_once = False

def fill(self, val):
"""Fill sequences for each component of the data vector."""
# this may be not efficient, but I could not change the method runtime
if self._filled_once:
self._fill_others(val)
else:
self._fill_first(val)

def _fill_first(self, val):
# fill the first element. Will learn data type from that,
# its dimension and organise sequences.
data, context = lena.flow.get_data_context(val)
try:
dim = len(data)
except TypeError:
raise LenaValueError(
"no way to find out data dimension. "
"type of the data does not support len"
)

if self._construct is None:
self._construct = type(data)
# will be used like _construct(*result),
# that is data.__init__ must support such arguments.

self._seqs = [self._seq]
self._seqs.extend([copy.deepcopy(self._seq) for _ in range(dim-1)])
# doesn't work. _fill_first is always called (then _fill_others below)
self.fill = self._fill_others
self._fill_others(val)
self._filled_once = True

def _fill_others(self, val):
data, context = lena.flow.get_data_context(val)
for ind, seq in enumerate(self._seqs):
# can raise if data is not of a sufficient length
Expand Down
25 changes: 6 additions & 19 deletions tests/math/test_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,7 @@ def test_vectorize_init():
## init works ##
# not FillCompute sequence raises
with pytest.raises(lena.core.LenaTypeError):
Vectorize(lambda x: x)

# construct works
vi1 = Vectorize(Sum(), construct=lambda _: vector3)
# when we can't find dimension without flow,
# LenaRuntimeError is raised.
with pytest.raises(LenaRuntimeError):
assert list(vi1.compute())
Vectorize(lambda x: x, dim=1)

# construct with dim work
vi2 = Vectorize(Sum(), construct=vector3, dim=3)
Expand All @@ -161,15 +154,15 @@ def test_vectorize_init():

data = [vector3(1, 1, 1), vector3(1, 2, 3)]

v1 = Vectorize(Sum())
v1 = Vectorize(Sum(), dim=3)
# todo: use inspect.isclass to forbid this:
# v1 = Vectorize(Sum)
for val in data:
v1.fill(val)
assert list(v1.compute()) == [vector3(2, 3, 4)]
assert list(v1.compute()) == [(2, 3, 4)]
context = {"context": True}
v1.fill((vector3(0, 0, 0), context))
assert list(v1.compute()) == [(vector3(2, 3, 4), context)]
assert list(v1.compute()) == [((2, 3, 4), context)]


@given(
Expand All @@ -178,17 +171,11 @@ def test_vectorize_init():
min_size=1,
)
)
@pytest.mark.parametrize("from_seq", [True, False])
def test_vectorize_hypothesis(from_seq, data):
def test_vectorize_hypothesis(data):
# Vectorize doesn't mess with its input data.
# If we filled values, they will be properly handled
# by the nested sequence.
if from_seq:
# initializing each sequence explicitly
v = Vectorize([StoreFilled() for _ in range(3)])
else:
# copied automatically when getting dimension from data
v = Vectorize(StoreFilled())
v = Vectorize(StoreFilled(), dim=3)

for val in data:
v.fill(val)
Expand Down

0 comments on commit 9dc30b8

Please sign in to comment.