Skip to content

Commit

Permalink
ISlice supports negative start and stop parameters during run. Tested…
Browse files Browse the repository at this point in the history
… with hypothesis.
  • Loading branch information
ynikitenko committed Apr 25, 2021
1 parent a6ceded commit f80efbb
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 7 deletions.
153 changes: 146 additions & 7 deletions lena/flow/iterators.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
"""Adapters to iterators from ``itertools``."""
try:
from future_builtins import zip
except ModuleNotFoundError:
# not existent in Python 3.9
pass
import collections
import itertools

import lena.core
Expand Down Expand Up @@ -43,7 +49,8 @@ def __call__(self):


class ISlice(object):
"""Slice iterable from *start* to *stop* with *step*."""
"""Slice data flow from *start* to *stop* with *step*."""

def __init__(self, *args):
"""Initialization:
Expand All @@ -52,11 +59,61 @@ def __init__(self, *args):
:class:`ISlice` (*start, stop* [*, step*])
Similar to :func:`itertools.islice` or :func:`range`.
Negative indices for *start* and *stop* are supported
during :meth:`run`.
Examples:
>>> ISlice(1000) # doctest: +SKIP
analyse only one thousand first events (no other values
from flow are generated).
Use it for quick checks of data on small subsamples.
>>> ISlice(-1) # doctest: +SKIP
yields all elements from the flow except the last one.
>>> ISlice(1, -1) # doctest: +SKIP
yields all elements from the flow
except the first and the last one.
Note that in case of negative indices it is necessary
to store abs(start) or abs(stop) values in memory.
For example, to discard the last 200 elements
one has to a) read the whole flow, b) store 200 elements
during each iteration.
"""
self._islice = lambda iterable: itertools.islice(iterable, *args)
self._indices = self._islice(itertools.count(0))
self._next_index = -1
self._index = 0
# todo: rename to Slice in the next release.
from itertools import islice
if all([val is None or val >= 0 for val in args]):
# if step=0, then error is raised not here,
self._islice = lambda iterable: islice(iterable, *args)
# but here when we use this lambda:
try:
self._indices = self._islice(itertools.count(0))
except ValueError as err:
raise lena.core.LenaValueError(err)
self._next_index = -1
self._index = 0
else:
# negative indices
s = slice(*args)
self._start, self._stop, step = s.start, s.stop, s.step
# if step is None, it is 1 by default.
step = step or 1
if step <= 0:
raise lena.core.LenaValueError(
"step must be a natural number (integer >= 1)"
)
if step != 1:
# non-trivial step is computed here.
self.run = lambda flow: islice(self._run_negative_islice(flow),
None, None, step)
else:
self.run = lambda flow: self._run_negative_islice(flow)
self._step = step

def fill_into(self, element, value):
"""Fill *element* with *value*.
Expand All @@ -72,7 +129,89 @@ def fill_into(self, element, value):
element.fill(value)
self._index += 1

def _run_negative_islice(self, flow):
from collections import deque
start, stop, step = self._start, self._stop, self._step

def fill_deque(flow, maxlen):
# Fill a deque with exactly maxlen values from *flow*
# and return that. All other values remain in *flow*.
d = deque(maxlen=maxlen)
for _, val in zip(range(maxlen), flow):
d.appendleft(val)
return d

if start is None:
# we have only a stop, which is negative.
# yield all values except the last (-stop) ones.
to_skip = -stop
# initially fill the deque
d = fill_deque(flow, maxlen=to_skip)
for val in flow:
yield d.pop()
d.appendleft(val)
else:
if start >= 0:
# skip *start* values
for _ in zip(range(start), flow):
pass
if stop is None:
for val in flow:
yield val
return
# stop is negative
d = fill_deque(flow, -stop)
if len(d) < -stop:
# stop is before start
return
for val in flow:
yield d.pop()
d.appendleft(val)
else:
# start < 0
if stop is None:
d = deque(flow, maxlen=-start)
while True:
try:
yield d.popleft()
except IndexError:
return
if stop <= start:
return
if stop < 0:
# will exhaust all flow and fill the deque
# with last maxlen elements
d = deque(flow, maxlen=-start)
ind = 0
# imitate
# for val in d[:stop-start]:
# which is not possible with a deque.
while ind < len(d) + stop:
yield d.popleft()
ind += 1
else:
# stop is positive
ind = 0
d = deque(maxlen=-start)
stop_missed = False
for val in flow:
# we know that we'll never yield anything
# because stop is too small.
if ind >= stop - start:
return
d.append(val)
ind += 1
# deque is filled, flow is finished.
# we begin again from the start of the deque.
ind -= len(d)
while ind < stop:
try:
yield d.popleft()
except IndexError:
return
ind += 1


def run(self, flow):
"""Yield values from *start* to *stop* with *step*."""
for val in self._islice(flow):
yield val
return self._islice(flow)
69 changes: 69 additions & 0 deletions tests/flow/test_iterators.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
from lena.flow.iterators import ISlice
from tests.examples.fill import StoreFilled

from hypothesis import strategies as s
from hypothesis import given
# don't think anything would change with other numbers
hypo_int_max = 200


def test_chain():
nums = [1, 2, 3]
Expand Down Expand Up @@ -81,3 +86,67 @@ def test_islice():
for i in range(0, 19):
isl.fill_into(store, i)
assert store.list == list(range(10, 20, 2))


def test_negative_islice():
# negative stop
isl1 = ISlice(-1)
data = [0, 1, 2]
assert list(isl1.run(iter(data))) == [0, 1]

# positive start, negative stop
isl2 = ISlice(1, -1)
assert list(isl2.run(iter(data))) == [1]

# negative start, negative stop
isl3 = ISlice(-2, -1)
assert list(isl3.run(iter(data))) == [1]

# negative start, positive stop
isl4 = ISlice(-2, 2)
assert list(isl4.run(iter(data))) == [1]

## step works
s = slice(None, None, 2)
isl5 = ISlice(s.start, s.stop, s.step)
assert list(isl5.run(iter(range(0, 6)))) == list(range(0, 6))[s.start:s.stop:s.step]

# step with negative index works
s = slice(-3, 3, 2)
isl6 = ISlice(s.start, s.stop, s.step)
assert list(isl6.run(iter(range(0, 6)))) == list(range(0, 6))[s.start:s.stop:s.step]

# initialization works correctly (seems not always)
isl7 = ISlice(-3, 5, None)
assert (isl7._start, isl7._stop, isl7._step) == (-3, 5, 1)

for s in [slice(-3, 3, 2), slice(-3, 4, 3), slice(-3, 5),
# negative very large start should have no effect,
# like getting all elements.
slice(-100, 3)]:
isl = ISlice(s.start, s.stop, s.step)
for data in [range(0), range(4), range(10), range(20)]:
assert list(isl.run(iter(data))) == list(data)[s.start:s.stop:s.step]

# to check it myself.
isl8 = ISlice(-100, 3)
assert list(isl8.run(iter(range(5)))) == list(range(3))

# step must be a natural number
# negative step raises
with pytest.raises(lena.core.LenaValueError):
ISlice(None, None, -1)
# zero step raises
with pytest.raises(lena.core.LenaValueError):
ISlice(None, None, 0)


start_stop_s = s.one_of(s.none(), s.integers(-hypo_int_max, hypo_int_max))
step_s = s.integers(1, hypo_int_max)

@given(start=start_stop_s, stop=start_stop_s, step=step_s,
data_len=s.integers(0, hypo_int_max))
def test_islice_hypothesis(start, stop, step, data_len):
data = list(range(data_len))
isl = ISlice(start, stop, step)
assert list(isl.run(iter(data))) == data[start:stop:step]

0 comments on commit f80efbb

Please sign in to comment.