math.Vectorize no longer allows learning data dimension from flow. It…

… allows one to use rigid elements (fully constructed during initialisation) and metaprogramming.
ynikitenko · Sep 18, 2023 · 9dc30b8 · 9dc30b8
1 parent 39e7323
commit 9dc30b8
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 95 deletions.
diff --git a/lena/math/elements.py b/lena/math/elements.py
@@ -12,7 +12,8 @@
 import lena.context
 import lena.core
 from lena.core import (
-    LenaTypeError, LenaRuntimeError, LenaZeroDivisionError, LenaValueError
+    LenaTypeError, LenaRuntimeError, LenaZeroDivisionError, LenaValueError,
+    is_fill_compute_el
 )
 import lena.flow
 
@@ -255,96 +256,48 @@ def reset(self):
 class Vectorize(object):
     """Apply an algorithm to a vector component-wise."""
 
-    def __init__(self, seq, construct=None, dim=-1):
-        """*seq* is converted to a :class:`.FillComputeSeq`.
+    def __init__(self, seq, dim=-1, construct=None):
+        """*seq* must be a *FillCompute* element or sequence.
 
-        Return type during :meth:`compute` will be know from the first
-        filled element.
-        *construct* is needed in case the flow was empty.
-        It will provide the needed dimension and data type.
-        However, often an object constructor can allow
-        an arbitrary dimension (like ``tuple``).
-        In that case, provide *dim*.
+        *dim* is the dimension of the input data
+        (and of the constructed structure).
+        *seq* may also be a list of sequences, in that case
+        *dim* may be omitted.
 
-        *seq* can be a list of :class:`.FillComputeSeq`-s.
-        In that case dimension should not be provided.
+        *construct* allows one to create an arbitrary object
+        (by default the resulting values are tuples of dimension *dim*).
         """
-        default_dim = -1
-        # todo: if needed, a list *seq* could mean
-        # a list of sequences of the needed dimension.
+        # Vectorize should be a rigid element
+        # (we don't change its dimension easily),
+        # therefore its dimension is set during initialisation.
+        # if isinstance(seq, list):
         if isinstance(seq, list):
-            # Vectorize should be a rigid element
-            # (we don't change its dimension easily),
-            # but list is associated with parellelism in Lena
-            # seq must consist of FillComputeSeq-s,
-            # we don't init them automatically here
+            if dim != -1:
+                raise LenaTypeError(
+                    "no dimension should be provided with a list"
+                )
             self._seqs = seq
-            assert dim == default_dim
-            dim = len(seq)
-            self.fill = self._fill_others
         else:
-            try:
-                self._seq = lena.core.FillComputeSeq(seq)
-            except TypeError:
+            if dim == -1:
+                raise LenaTypeError(
+                    "dim must be provided with a sequence"
+                )
+            if not is_fill_compute_el(seq):
                 raise lena.core.LenaTypeError(
-                    "seq must be convertible to FillComputeSeq"
+                    "seq must be a FillCompute element or sequence"
                 )
-            if dim == default_dim:
-                pass
-                # self.fill = self._fill_first
-            else:
-                self._seqs = [self._seq]
-                self._seqs.extend([copy.deepcopy(self._seq) for _ in range(dim-1)])
-                # self.fill = self._fill_others
-
-        ## No need to get dim from here. Explicit dim would never hurt.
-        # if dim == default_dim and construct is not None:
-        #     try:
-        #         _tmp = construct()
-        #         dim = len(_tmp)
-        #     except TypeError:
-        #         # we have a chance to get data dimension from flow
-        #         pass
-
+            self._seqs = [seq]
+            self._seqs.extend([copy.deepcopy(seq) for _ in range(dim-1)])
 
+        # todo: get rid of construct,
+        # a separate Lena element may be better.
         self._construct = construct
-        self._dim = dim
+        self._dim = len(self._seqs)
         self._cur_context = {}
         self._filled_once = False
 
     def fill(self, val):
         """Fill sequences for each component of the data vector."""
-        # this may be not efficient, but I could not change the method runtime
-        if self._filled_once:
-            self._fill_others(val)
-        else:
-            self._fill_first(val)
-
-    def _fill_first(self, val):
-        # fill the first element. Will learn data type from that,
-        # its dimension and organise sequences.
-        data, context = lena.flow.get_data_context(val)
-        try:
-            dim = len(data)
-        except TypeError:
-            raise LenaValueError(
-                "no way to find out data dimension. "
-                "type of the data does not support len"
-            )
-
-        if self._construct is None:
-            self._construct = type(data)
-            # will be used like _construct(*result),
-            # that is data.__init__ must support such arguments.
-
-        self._seqs = [self._seq]
-        self._seqs.extend([copy.deepcopy(self._seq) for _ in range(dim-1)])
-        # doesn't work. _fill_first is always called (then _fill_others below)
-        self.fill = self._fill_others
-        self._fill_others(val)
-        self._filled_once = True
-
-    def _fill_others(self, val):
         data, context = lena.flow.get_data_context(val)
         for ind, seq in enumerate(self._seqs):
             # can raise if data is not of a sufficient length

diff --git a/tests/math/test_elements.py b/tests/math/test_elements.py
@@ -141,14 +141,7 @@ def test_vectorize_init():
     ## init works ##
     # not FillCompute sequence raises
     with pytest.raises(lena.core.LenaTypeError):
-        Vectorize(lambda x: x)
-
-    # construct works
-    vi1 = Vectorize(Sum(), construct=lambda _: vector3)
-    # when we can't find dimension without flow,
-    # LenaRuntimeError is raised.
-    with pytest.raises(LenaRuntimeError):
-        assert list(vi1.compute())
+        Vectorize(lambda x: x, dim=1)
 
     # construct with dim work
     vi2 = Vectorize(Sum(), construct=vector3, dim=3)
@@ -161,15 +154,15 @@ def test_vectorize_init():
 
     data = [vector3(1, 1, 1), vector3(1, 2, 3)]
 
-    v1 = Vectorize(Sum())
+    v1 = Vectorize(Sum(), dim=3)
     # todo: use inspect.isclass to forbid this:
     # v1 = Vectorize(Sum)
     for val in data:
         v1.fill(val)
-    assert list(v1.compute()) == [vector3(2, 3, 4)]
+    assert list(v1.compute()) == [(2, 3, 4)]
     context = {"context": True}
     v1.fill((vector3(0, 0, 0), context))
-    assert list(v1.compute()) == [(vector3(2, 3, 4), context)]
+    assert list(v1.compute()) == [((2, 3, 4), context)]
 
 
 @given(
@@ -178,17 +171,11 @@ def test_vectorize_init():
         min_size=1,
     )
 )
-@pytest.mark.parametrize("from_seq", [True, False])
-def test_vectorize_hypothesis(from_seq, data):
+def test_vectorize_hypothesis(data):
     # Vectorize doesn't mess with its input data.
     # If we filled values, they will be properly handled
     # by the nested sequence.
-    if from_seq:
-        # initializing each sequence explicitly
-        v = Vectorize([StoreFilled() for _ in range(3)])
-    else:
-        # copied automatically when getting dimension from data
-        v = Vectorize(StoreFilled())
+    v = Vectorize(StoreFilled(), dim=3)
 
     for val in data:
         v.fill(val)