ISlice supports negative start and stop parameters during run. Tested…

… with hypothesis.
ynikitenko · Apr 25, 2021 · f80efbb · f80efbb
1 parent a6ceded
commit f80efbb
Show file tree

Hide file tree

Showing 2 changed files with 215 additions and 7 deletions.
diff --git a/lena/flow/iterators.py b/lena/flow/iterators.py
@@ -1,4 +1,10 @@
 """Adapters to iterators from ``itertools``."""
+try:
+    from future_builtins import zip
+except ModuleNotFoundError:
+    # not existent in Python 3.9
+    pass
+import collections
 import itertools
 
 import lena.core
@@ -43,7 +49,8 @@ def __call__(self):
 
 
 class ISlice(object):
-    """Slice iterable from *start* to *stop* with *step*."""
+    """Slice data flow from *start* to *stop* with *step*."""
+
     def __init__(self, *args):
         """Initialization:
 
@@ -52,11 +59,61 @@ def __init__(self, *args):
         :class:`ISlice` (*start, stop* [*, step*])
 
         Similar to :func:`itertools.islice` or :func:`range`.
+        Negative indices for *start* and *stop* are supported
+        during :meth:`run`.
+
+        Examples:
+
+        >>> ISlice(1000)  # doctest: +SKIP
+
+        analyse only one thousand first events (no other values
+        from flow are generated).
+        Use it for quick checks of data on small subsamples.
+
+        >>> ISlice(-1)  # doctest: +SKIP
+
+        yields all elements from the flow except the last one.
+
+        >>> ISlice(1, -1)  # doctest: +SKIP
+
+        yields all elements from the flow
+        except the first and the last one.
+        
+        Note that in case of negative indices it is necessary
+        to store abs(start) or abs(stop) values in memory.
+        For example, to discard the last 200 elements
+        one has to a) read the whole flow, b) store 200 elements
+        during each iteration.
         """
-        self._islice = lambda iterable: itertools.islice(iterable, *args)
-        self._indices = self._islice(itertools.count(0))
-        self._next_index = -1
-        self._index = 0
+        # todo: rename to Slice in the next release.
+        from itertools import islice
+        if all([val is None or val >= 0 for val in args]):
+            # if step=0, then error is raised not here,
+            self._islice = lambda iterable: islice(iterable, *args)
+            # but here when we use this lambda:
+            try:
+                self._indices = self._islice(itertools.count(0))
+            except ValueError as err:
+                raise lena.core.LenaValueError(err)
+            self._next_index = -1
+            self._index = 0
+        else:
+            # negative indices
+            s = slice(*args)
+            self._start, self._stop, step = s.start, s.stop, s.step
+            # if step is None, it is 1 by default.
+            step = step or 1
+            if step <= 0:
+                raise lena.core.LenaValueError(
+                    "step must be a natural number (integer >= 1)"
+                )
+            if step != 1:
+                # non-trivial step is computed here.
+                self.run = lambda flow: islice(self._run_negative_islice(flow),
+                                               None, None, step)
+            else:
+                self.run = lambda flow: self._run_negative_islice(flow)
+            self._step = step
 
     def fill_into(self, element, value):
         """Fill *element* with *value*.
@@ -72,7 +129,89 @@ def fill_into(self, element, value):
             element.fill(value)
         self._index += 1
 
+    def _run_negative_islice(self, flow):
+        from collections import deque
+        start, stop, step = self._start, self._stop, self._step
+
+        def fill_deque(flow, maxlen):
+            # Fill a deque with exactly maxlen values from *flow*
+            # and return that. All other values remain in *flow*.
+            d = deque(maxlen=maxlen)
+            for _, val in zip(range(maxlen), flow):
+                d.appendleft(val)
+            return d
+
+        if start is None:
+            # we have only a stop, which is negative.
+            # yield all values except the last (-stop) ones.
+            to_skip = -stop
+            # initially fill the deque
+            d = fill_deque(flow, maxlen=to_skip)
+            for val in flow:
+                yield d.pop()
+                d.appendleft(val)
+        else:
+            if start >= 0:
+                # skip *start* values
+                for _ in zip(range(start), flow):
+                    pass
+                if stop is None:
+                    for val in flow:
+                        yield val
+                    return
+                # stop is negative
+                d = fill_deque(flow, -stop)
+                if len(d) < -stop:
+                    # stop is before start
+                    return
+                for val in flow:
+                    yield d.pop()
+                    d.appendleft(val)
+            else:
+                # start < 0
+                if stop is None:
+                    d = deque(flow, maxlen=-start)
+                    while True:
+                        try:
+                            yield d.popleft()
+                        except IndexError:
+                            return
+                if stop <= start:
+                    return
+                if stop < 0:
+                    # will exhaust all flow and fill the deque
+                    # with last maxlen elements
+                    d = deque(flow, maxlen=-start)
+                    ind = 0
+                    # imitate
+                    # for val in d[:stop-start]:
+                    # which is not possible with a deque.
+                    while ind < len(d) + stop:
+                        yield d.popleft()
+                        ind += 1
+                else:
+                    # stop is positive
+                    ind = 0
+                    d = deque(maxlen=-start)
+                    stop_missed = False
+                    for val in flow:
+                        # we know that we'll never yield anything
+                        # because stop is too small.
+                        if ind >= stop - start:
+                            return
+                        d.append(val)
+                        ind += 1
+                    # deque is filled, flow is finished.
+                    # we begin again from the start of the deque.
+                    ind -= len(d)
+                    while ind < stop:
+                        try:
+                            yield d.popleft()
+                        except IndexError:
+                            return
+                        ind += 1
+
+
     def run(self, flow):
         """Yield values from *start* to *stop* with *step*."""
-        for val in self._islice(flow):
-            yield val
+        return self._islice(flow)
diff --git a/tests/flow/test_iterators.py b/tests/flow/test_iterators.py
@@ -9,6 +9,11 @@
 from lena.flow.iterators import ISlice
 from tests.examples.fill import StoreFilled
 
+from hypothesis import strategies as s
+from hypothesis import given
+# don't think anything would change with other numbers
+hypo_int_max = 200
+
 
 def test_chain():
     nums = [1, 2, 3]
@@ -81,3 +86,67 @@ def test_islice():
     for i in range(0, 19):
         isl.fill_into(store, i)
     assert store.list == list(range(10, 20, 2))
+
+
+def test_negative_islice():
+    # negative stop
+    isl1 = ISlice(-1)
+    data = [0, 1, 2]
+    assert list(isl1.run(iter(data))) == [0, 1]
+
+    # positive start, negative stop
+    isl2 = ISlice(1, -1)
+    assert list(isl2.run(iter(data))) == [1]
+
+    # negative start, negative stop
+    isl3 = ISlice(-2, -1)
+    assert list(isl3.run(iter(data))) == [1]
+
+    # negative start, positive stop
+    isl4 = ISlice(-2, 2)
+    assert list(isl4.run(iter(data))) == [1]
+
+    ## step works
+    s = slice(None, None, 2)
+    isl5 = ISlice(s.start, s.stop, s.step)
+    assert list(isl5.run(iter(range(0, 6)))) == list(range(0, 6))[s.start:s.stop:s.step]
+
+    # step with negative index works
+    s = slice(-3, 3, 2)
+    isl6 = ISlice(s.start, s.stop, s.step)
+    assert list(isl6.run(iter(range(0, 6)))) == list(range(0, 6))[s.start:s.stop:s.step]
+
+    # initialization works correctly (seems not always)
+    isl7 = ISlice(-3, 5, None)
+    assert (isl7._start, isl7._stop, isl7._step) == (-3, 5, 1)
+
+    for s in [slice(-3, 3, 2), slice(-3, 4, 3), slice(-3, 5),
+              # negative very large start should have no effect,
+              # like getting all elements.
+              slice(-100, 3)]:
+        isl = ISlice(s.start, s.stop, s.step)
+        for data in [range(0), range(4), range(10), range(20)]:
+            assert list(isl.run(iter(data))) == list(data)[s.start:s.stop:s.step]
+
+    # to check it myself.
+    isl8 = ISlice(-100, 3)
+    assert list(isl8.run(iter(range(5)))) == list(range(3))
+
+    # step must be a natural number
+    # negative step raises
+    with pytest.raises(lena.core.LenaValueError):
+        ISlice(None, None, -1)
+    # zero step raises
+    with pytest.raises(lena.core.LenaValueError):
+        ISlice(None, None, 0)
+
+
+start_stop_s = s.one_of(s.none(), s.integers(-hypo_int_max, hypo_int_max))
+step_s = s.integers(1, hypo_int_max)
+
+@given(start=start_stop_s, stop=start_stop_s, step=step_s,
+       data_len=s.integers(0, hypo_int_max))
+def test_islice_hypothesis(start, stop, step, data_len):
+    data = list(range(data_len))
+    isl = ISlice(start, stop, step)
+    assert list(isl.run(iter(data))) == data[start:stop:step]