# Advanced indexing

In [2]:
import sys
sys.path.insert(0, '..')
import zarr
import numpy as np
np.random.seed(42)
import cProfile
zarr.__version__

'2.1.5.dev83'

## Functionality and API

### Indexing a 1D array with a Boolean array

In [47]:
a = np.arange(10)
za = zarr.array(a, chunks=2)
ix = [False,  True,  False,  True, False, True, False,  True,  False,  True]

In [48]:
# get items
za[ix]

array([1, 3, 5, 7, 9])

In [49]:
# set items
za[ix] = a[ix] * 10
za[:]

array([ 0, 10,  2, 30,  4, 50,  6, 70,  8, 90])

In [50]:
# indexing array can be any array-like, e.g., Zarr array
zix = zarr.array(ix, chunks=2)
za = zarr.array(a, chunks=2)
za[zix]  # will not load all zix into memory

array([1, 3, 5, 7, 9])

### Indexing a 1D array with an integer array

In [9]:
a = np.arange(10)
za = zarr.array(a, chunks=2)
ix = [1, 3, 5, 7, 9]

In [19]:
# get items
za[ix]

array([10, 30, 50, 70, 90])

In [20]:
# set items
za[ix] = a[ix] * 10
za[:]

array([ 0, 10,  2, 30,  4, 50,  6, 70,  8, 90])

### Slicing a 1D array with step > 1

Slices with step > 1 are supported. Internally these are converted to an integer array via ``np.arange``.

In [None]:
a = np.arange(10)
za = zarr.array(a, chunks=2)

In [21]:
# get items
za[1::2]

array([10, 30, 50, 70, 90])

In [22]:
# set items
za[1::2] = a[1::2] * 10
za[:]

array([ 0, 10,  2, 30,  4, 50,  6, 70,  8, 90])

### Orthogonal (outer) indexing of multi-dimensional arrays

Orthogonal (a.k.a. outer) indexing is supported with either Boolean or integer arrays. This functionality is provided via the ``get/set_orthogonal_selection()`` methods. For convenience, this functionality is also available via the ``oindex[]`` property as has been proposed for numpy.

In [51]:
a = np.arange(15).reshape(5, 3)
za = zarr.array(a, chunks=(3, 2))
za[:]

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [30]:
# orthogonal indexing with Boolean arrays
ix0 = [False, True, False, True, False]
ix1 = [True, False, True]
za.get_orthogonal_selection((ix0, ix1))

array([[ 3,  5],
       [ 9, 11]])

In [31]:
# alternative API
za.oindex[ix0, ix1]

array([[ 3,  5],
       [ 9, 11]])

In [32]:
# orthogonal indexing with integer arrays
ix0 = [1, 3]
ix1 = [0, 2]
za.get_orthogonal_selection((ix0, ix1))

array([[ 3,  5],
       [ 9, 11]])

In [33]:
# alternative API
za.oindex[ix0, ix1]

array([[ 3,  5],
       [ 9, 11]])

In [38]:
# combine with slice
za.oindex[[1,  3], :]

array([[ 3,  4,  5],
       [ 9, 10, 11]])

In [39]:
# combine with slice
za.oindex[:, [0, 2]]

array([[ 0,  2],
       [ 3,  5],
       [ 6,  8],
       [ 9, 11],
       [12, 14]])

In [41]:
# set items via Boolean selection
ix0 = [False, True, False, True, False]
ix1 = [True, False, True]
selection = ix0, ix1
value = 42
za.set_orthogonal_selection(selection, value)
za[:]

array([[ 0,  1,  2],
       [42,  4, 42],
       [ 6,  7,  8],
       [42, 10, 42],
       [12, 13, 14]])

In [43]:
# alternative API
za.oindex[ix0, ix1] = 44
za[:]

array([[ 0,  1,  2],
       [44,  4, 44],
       [ 6,  7,  8],
       [44, 10, 44],
       [12, 13, 14]])

In [44]:
# set items via integer selection
ix0 = [1, 3]
ix1 = [0, 2]
selection = ix0, ix1
value = 46
za.set_orthogonal_selection(selection, value)
za[:]

array([[ 0,  1,  2],
       [46,  4, 46],
       [ 6,  7,  8],
       [46, 10, 46],
       [12, 13, 14]])

In [45]:
# alternative API
za.oindex[ix0, ix1] = 48
za[:]

array([[ 0,  1,  2],
       [48,  4, 48],
       [ 6,  7,  8],
       [48, 10, 48],
       [12, 13, 14]])

## Coordinate indexing of multi-dimensional arrays

Selecting arbitrary points from a multi-dimensional array by indexing with integer (coordinate) arrays is supported. This functionality is provided via the ``get/set_coordinate_selection()`` methods. For convenience, this functionality is also available via the ``vindex[]`` property as has been proposed for numpy.

In [52]:
a = np.arange(15).reshape(5, 3)
za = zarr.array(a, chunks=(3, 2))
za[:]

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [57]:
# get items
ix0 = [1, 3]
ix1 = [0, 2]
za.get_coordinate_selection((ix0, ix1))

array([ 3, 11])

In [56]:
# alternative API
za.vindex[ix0, ix1]

array([ 3, 11])

In [58]:
# set items
za.set_coordinate_selection((ix0, ix1), 42)
za[:]

array([[ 0,  1,  2],
       [42,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 42],
       [12, 13, 14]])

In [59]:
# alternative API
za.vindex[ix0, ix1] = 44
za[:]

array([[ 0,  1,  2],
       [44,  4, 44],
       [ 6,  7,  8],
       [44, 10, 44],
       [12, 13, 14]])

## 1D Benchmarking

In [25]:
c = np.arange(100000000)
c.nbytes

800000000

In [26]:
%time zc = zarr.array(c)
zc.info

CPU times: user 508 ms, sys: 28 ms, total: 536 ms
Wall time: 162 ms


0,1
Type,zarr.core.Array
Data type,int64
Shape,"(100000000,)"
Chunk shape,"(97657,)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,800000000 (762.9M)
No. bytes stored,11854081 (11.3M)


In [27]:
%time c.copy()

CPU times: user 120 ms, sys: 60 ms, total: 180 ms
Wall time: 178 ms


array([       0,        1,        2, ..., 99999997, 99999998, 99999999])

In [29]:
%time zc[:]

CPU times: user 520 ms, sys: 32 ms, total: 552 ms
Wall time: 261 ms


array([       0,        1,        2, ..., 99999997, 99999998, 99999999])

### bool dense selection

In [82]:
# relatively dense selection - 10%
ix_dense_bool = np.random.binomial(1, 0.1, size=c.shape[0]).astype(bool)
np.count_nonzero(ix_dense_bool)

9998583

In [83]:
%time c[ix_dense_bool]

CPU times: user 312 ms, sys: 0 ns, total: 312 ms
Wall time: 311 ms


array([      23,       24,       39, ..., 99999967, 99999978, 99999995])

In [84]:
%time zc[ix_dense_bool]

CPU times: user 888 ms, sys: 52 ms, total: 940 ms
Wall time: 459 ms


array([      23,       24,       39, ..., 99999967, 99999978, 99999995])

In [85]:
cProfile.run('zc[ix_dense_bool]', sort='time')

         58423 function calls in 0.514 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1025    0.205    0.000    0.205    0.000 {method 'nonzero' of 'numpy.ndarray' objects}
     1024    0.169    0.000    0.179    0.000 core.py:930(_decode_chunk)
     1024    0.062    0.000    0.261    0.000 core.py:768(_chunk_getitem)
     1024    0.011    0.000    0.011    0.000 {built-in method numpy.core.multiarray.count_nonzero}
     1025    0.008    0.000    0.234    0.000 new_indexing.py:494(__iter__)
     1024    0.006    0.000    0.216    0.000 index_tricks.py:26(ix_)
     2048    0.006    0.000    0.006    0.000 core.py:323(<genexpr>)
     2048    0.005    0.000    0.005    0.000 {method 'reshape' of 'numpy.ndarray' objects}
     1024    0.004    0.000    0.004    0.000 {built-in method numpy.core.multiarray.frombuffer}
        1    0.004    0.004    0.499    0.499 core.py:548(_get_selection)
     1024    0.003    0.000    0.

### int dense selection

In [86]:
ix_dense_int = np.random.choice(c.shape[0], size=c.shape[0]//10, replace=True)
ix_dense_int_sorted = ix_dense_int.copy()
ix_dense_int_sorted.sort()
len(ix_dense_int), ix_dense_int

(10000000,
 array([38852033, 29570639,  6153807, ..., 51604068, 33056119, 29899374]))

In [87]:
%time c[ix_dense_int_sorted]

CPU times: user 60 ms, sys: 32 ms, total: 92 ms
Wall time: 91 ms


array([       6,        9,       15, ..., 99999956, 99999964, 99999985])

In [88]:
%time zc[ix_dense_int_sorted]

CPU times: user 576 ms, sys: 104 ms, total: 680 ms
Wall time: 428 ms


array([       6,        9,       15, ..., 99999956, 99999964, 99999985])

In [89]:
%time c[ix_dense_int]

CPU times: user 144 ms, sys: 20 ms, total: 164 ms
Wall time: 162 ms


array([38852033, 29570639,  6153807, ..., 51604068, 33056119, 29899374])

In [90]:
%time zc[ix_dense_int]

CPU times: user 2.34 s, sys: 156 ms, total: 2.49 s
Wall time: 2.18 s


array([38852033, 29570639,  6153807, ..., 51604068, 33056119, 29899374])

In [91]:
cProfile.run('zc[ix_dense_int_sorted]', sort='time')

         55379 function calls in 0.491 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.183    0.183    0.237    0.237 new_indexing.py:340(__init__)
     1024    0.099    0.000    0.107    0.000 core.py:930(_decode_chunk)
     1024    0.065    0.000    0.191    0.000 core.py:768(_chunk_getitem)
        1    0.026    0.026    0.026    0.026 {built-in method numpy.core.multiarray.bincount}
     1025    0.025    0.000    0.025    0.000 new_indexing.py:387(__iter__)
        1    0.024    0.024    0.024    0.024 function_base.py:1848(diff)
        1    0.007    0.007    0.245    0.245 core.py:548(_get_selection)
     2048    0.006    0.000    0.006    0.000 core.py:323(<genexpr>)
     1025    0.006    0.000    0.046    0.000 new_indexing.py:494(__iter__)
     2048    0.004    0.000    0.004    0.000 {method 'reshape' of 'numpy.ndarray' objects}
     1024    0.004    0.000    0.008    0.000 index_tricks.py:26(ix_)
 

In [92]:
cProfile.run('zc[ix_dense_int]', sort='time')

         69723 function calls in 2.217 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    1.417    1.417    1.417    1.417 {method 'argsort' of 'numpy.ndarray' objects}
        1    0.198    0.198    1.834    1.834 new_indexing.py:340(__init__)
        1    0.169    0.169    0.169    0.169 {method 'take' of 'numpy.ndarray' objects}
     1024    0.167    0.000    0.306    0.000 core.py:768(_chunk_getitem)
     1024    0.116    0.000    0.122    0.000 core.py:930(_decode_chunk)
     1025    0.026    0.000    0.027    0.000 new_indexing.py:387(__iter__)
        1    0.024    0.024    0.024    0.024 function_base.py:1848(diff)
        1    0.023    0.023    0.023    0.023 {built-in method numpy.core.multiarray.bincount}
     1025    0.008    0.000    0.056    0.000 new_indexing.py:494(__iter__)
        1    0.007    0.007    2.213    2.213 core.py:391(__getitem__)
     2048    0.007    0.000    0.013    0.000 index_tr

### bool sparse selection

In [94]:
# relatively sparse selection
ix_sparse_bool = np.random.binomial(1, 0.0001, size=c.shape[0]).astype(bool)
np.count_nonzero(ix_sparse_bool)

10033

In [95]:
%time c[ix_sparse_bool]

CPU times: user 20 ms, sys: 0 ns, total: 20 ms
Wall time: 21.6 ms


array([   35449,    41893,    45592, ..., 99987487, 99990184, 99993538])

In [96]:
%time zc[ix_sparse_bool]

CPU times: user 440 ms, sys: 56 ms, total: 496 ms
Wall time: 222 ms


array([   35449,    41893,    45592, ..., 99987487, 99990184, 99993538])

In [97]:
cProfile.run('zc[ix_sparse_bool]', sort='time')

         58423 function calls in 0.259 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1024    0.137    0.000    0.144    0.000 core.py:930(_decode_chunk)
     1024    0.026    0.000    0.026    0.000 {built-in method numpy.core.multiarray.count_nonzero}
     1025    0.023    0.000    0.023    0.000 {method 'nonzero' of 'numpy.ndarray' objects}
     1024    0.008    0.000    0.172    0.000 core.py:768(_chunk_getitem)
     1025    0.007    0.000    0.052    0.000 new_indexing.py:494(__iter__)
     1024    0.006    0.000    0.034    0.000 index_tricks.py:26(ix_)
     2048    0.006    0.000    0.006    0.000 core.py:323(<genexpr>)
        1    0.005    0.005    0.032    0.032 new_indexing.py:255(__init__)
     2048    0.004    0.000    0.004    0.000 {method 'reshape' of 'numpy.ndarray' objects}
        1    0.003    0.003    0.227    0.227 core.py:548(_get_selection)
     1025    0.003    0.000    0.003    0.000 new_inde

### int sparse selection

In [98]:
ix_sparse_int = np.random.choice(c.shape[0], size=c.shape[0]//10000, replace=True)
ix_sparse_int_sorted = ix_sparse_int.copy()
ix_sparse_int_sorted.sort()
len(ix_sparse_int), ix_sparse_int

(10000,
 array([49021295, 65674535, 71257616, ..., 12130114, 48117886, 98926729]))

In [99]:
%time c[ix_sparse_int_sorted]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 245 µs


array([   14556,    48679,    54538, ..., 99958362, 99994365, 99999645])

In [100]:
%time c[ix_sparse_int]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 233 µs


array([49021295, 65674535, 71257616, ..., 12130114, 48117886, 98926729])

In [101]:
%time zc[ix_sparse_int_sorted]

CPU times: user 388 ms, sys: 60 ms, total: 448 ms
Wall time: 172 ms


array([   14556,    48679,    54538, ..., 99958362, 99994365, 99999645])

In [102]:
%time zc[ix_sparse_int]

CPU times: user 456 ms, sys: 32 ms, total: 488 ms
Wall time: 182 ms


array([49021295, 65674535, 71257616, ..., 12130114, 48117886, 98926729])

In [103]:
cProfile.run('zc[ix_sparse_int]', sort='time')

         69723 function calls in 0.224 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1024    0.148    0.000    0.155    0.000 core.py:930(_decode_chunk)
     1025    0.008    0.000    0.038    0.000 new_indexing.py:494(__iter__)
     2048    0.008    0.000    0.015    0.000 index_tricks.py:26(ix_)
     1024    0.006    0.000    0.182    0.000 core.py:768(_chunk_getitem)
     2048    0.006    0.000    0.006    0.000 core.py:323(<genexpr>)
     3072    0.004    0.000    0.004    0.000 {method 'reshape' of 'numpy.ndarray' objects}
     1025    0.004    0.000    0.005    0.000 new_indexing.py:387(__iter__)
        1    0.003    0.003    0.223    0.223 core.py:548(_get_selection)
     1024    0.003    0.000    0.006    0.000 arrayprint.py:381(wrapper)
     2048    0.003    0.000    0.020    0.000 new_indexing.py:413(ix_)
     1024    0.003    0.000    0.010    0.000 {method 'join' of 'str' objects}
     1024    0.003    0

### sparse bool selection as zarr array

In [104]:
zix_sparse_bool = zarr.array(ix_sparse_bool)
zix_sparse_bool.info

0,1
Type,zarr.core.Array
Data type,bool
Shape,"(100000000,)"
Chunk shape,"(390625,)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,100000000 (95.4M)
No. bytes stored,507558 (495.7K)


In [105]:
%time zc[zix_sparse_bool]

CPU times: user 852 ms, sys: 140 ms, total: 992 ms
Wall time: 450 ms


array([   35449,    41893,    45592, ..., 99987487, 99990184, 99993538])

### slice with step

In [53]:
%time np.array(c[::2])

CPU times: user 68 ms, sys: 28 ms, total: 96 ms
Wall time: 92.7 ms


array([       0,        2,        4, ..., 99999994, 99999996, 99999998])

In [54]:
%time zc[::2]

CPU times: user 1.3 s, sys: 268 ms, total: 1.57 s
Wall time: 1.3 s


array([       0,        2,        4, ..., 99999994, 99999996, 99999998])

In [55]:
%time zc[::10]

CPU times: user 564 ms, sys: 84 ms, total: 648 ms
Wall time: 396 ms


array([       0,       10,       20, ..., 99999970, 99999980, 99999990])

In [56]:
%time zc[::100]

CPU times: user 472 ms, sys: 40 ms, total: 512 ms
Wall time: 213 ms


array([       0,      100,      200, ..., 99999700, 99999800, 99999900])

In [57]:
%time zc[::1000]

CPU times: user 432 ms, sys: 48 ms, total: 480 ms
Wall time: 192 ms


array([       0,     1000,     2000, ..., 99997000, 99998000, 99999000])

## 2D Benchmarking

In [106]:
c.shape

(100000000,)

In [107]:
d = c.reshape(-1, 1000)
d.shape

(100000, 1000)

In [108]:
zd = zarr.array(d)
zd.info

0,1
Type,zarr.core.Array
Data type,int64
Shape,"(100000, 1000)"
Chunk shape,"(3125, 32)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,800000000 (762.9M)
No. bytes stored,39228864 (37.4M)


### bool orthogonal selection

In [109]:
ix0 = np.random.binomial(1, 0.5, size=d.shape[0]).astype(bool)
ix1 = np.random.binomial(1, 0.5, size=d.shape[1]).astype(bool)

In [110]:
%time d[np.ix_(ix0, ix1)]

CPU times: user 140 ms, sys: 24 ms, total: 164 ms
Wall time: 165 ms


array([[       0,        1,        3, ...,      995,      998,      999],
       [    2000,     2001,     2003, ...,     2995,     2998,     2999],
       [    4000,     4001,     4003, ...,     4995,     4998,     4999],
       ..., 
       [99992000, 99992001, 99992003, ..., 99992995, 99992998, 99992999],
       [99997000, 99997001, 99997003, ..., 99997995, 99997998, 99997999],
       [99999000, 99999001, 99999003, ..., 99999995, 99999998, 99999999]])

In [111]:
%time zd.oindex[ix0, ix1]

CPU times: user 860 ms, sys: 84 ms, total: 944 ms
Wall time: 468 ms


array([[       0,        1,        3, ...,      995,      998,      999],
       [    2000,     2001,     2003, ...,     2995,     2998,     2999],
       [    4000,     4001,     4003, ...,     4995,     4998,     4999],
       ..., 
       [99992000, 99992001, 99992003, ..., 99992995, 99992998, 99992999],
       [99997000, 99997001, 99997003, ..., 99997995, 99997998, 99997999],
       [99999000, 99999001, 99999003, ..., 99999995, 99999998, 99999999]])

### int orthogonal selection

In [112]:
ix0 = np.random.choice(d.shape[0], size=int(d.shape[0] * .5), replace=True)
ix1 = np.random.choice(d.shape[1], size=int(d.shape[1] * .5), replace=True)

In [113]:
%time d[np.ix_(ix0, ix1)]

CPU times: user 196 ms, sys: 56 ms, total: 252 ms
Wall time: 250 ms


array([[50767038, 50767472, 50767242, ..., 50767418, 50767445, 50767947],
       [28829038, 28829472, 28829242, ..., 28829418, 28829445, 28829947],
       [17474038, 17474472, 17474242, ..., 17474418, 17474445, 17474947],
       ..., 
       [ 5185038,  5185472,  5185242, ...,  5185418,  5185445,  5185947],
       [27248038, 27248472, 27248242, ..., 27248418, 27248445, 27248947],
       [72575038, 72575472, 72575242, ..., 72575418, 72575445, 72575947]])

In [114]:
%time zd.oindex[ix0, ix1]

CPU times: user 1.17 s, sys: 128 ms, total: 1.3 s
Wall time: 682 ms


array([[50767038, 50767472, 50767242, ..., 50767418, 50767445, 50767947],
       [28829038, 28829472, 28829242, ..., 28829418, 28829445, 28829947],
       [17474038, 17474472, 17474242, ..., 17474418, 17474445, 17474947],
       ..., 
       [ 5185038,  5185472,  5185242, ...,  5185418,  5185445,  5185947],
       [27248038, 27248472, 27248242, ..., 27248418, 27248445, 27248947],
       [72575038, 72575472, 72575242, ..., 72575418, 72575445, 72575947]])

### coordinate (point) selection

In [115]:
n = int(d.size * .1)
ix0 = np.random.choice(d.shape[0], size=n, replace=True)
ix1 = np.random.choice(d.shape[1], size=n, replace=True)
n

10000000

In [116]:
%time d[ix0, ix1]

CPU times: user 236 ms, sys: 56 ms, total: 292 ms
Wall time: 289 ms


array([71132822, 44407411, 66463897, ..., 16188129, 30562595,  3115554])

In [117]:
%time zd.vindex[ix0, ix1]

CPU times: user 3.06 s, sys: 296 ms, total: 3.36 s
Wall time: 2.83 s


array([71132822, 44407411, 66463897, ..., 16188129, 30562595,  3115554])

In [118]:
cProfile.run('zd.vindex[ix0, ix1]', sort='time')

         48284 function calls in 2.856 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    1.401    1.401    1.401    1.401 {method 'argsort' of 'numpy.ndarray' objects}
        3    0.412    0.137    0.412    0.137 new_indexing.py:601(<genexpr>)
        3    0.259    0.086    0.259    0.086 new_indexing.py:592(<genexpr>)
        1    0.242    0.242    2.414    2.414 new_indexing.py:557(__init__)
     1024    0.196    0.000    0.377    0.000 core.py:768(_chunk_getitem)
     1024    0.151    0.000    0.160    0.000 core.py:930(_decode_chunk)
        1    0.056    0.056    0.056    0.056 {built-in method numpy.core.multiarray.ravel_multi_index}
        1    0.038    0.038    0.038    0.038 {built-in method numpy.core.multiarray.bincount}
     3072    0.023    0.000    0.023    0.000 new_indexing.py:636(<genexpr>)
        1    0.012    0.012    2.843    2.843 core.py:537(get_coordinate_selection)
     1025    0.010   

## h5py comparison

N.B., not really fair because using slower compressor, but for interest...

In [65]:
import h5py
import tempfile

In [78]:
h5f = h5py.File(tempfile.mktemp(), driver='core', backing_store=False)

In [79]:
hc = h5f.create_dataset('c', data=c, compression='gzip', compression_opts=1, chunks=zc.chunks, shuffle=True)
hc

<HDF5 dataset "c": shape (100000000,), type "<i8">

In [80]:
%time hc[:]

CPU times: user 1.16 s, sys: 172 ms, total: 1.33 s
Wall time: 1.32 s


array([       0,        1,        2, ..., 99999997, 99999998, 99999999])

In [81]:
%time hc[ix_sparse_bool]

CPU times: user 1.11 s, sys: 0 ns, total: 1.11 s
Wall time: 1.11 s


array([    1063,    28396,    37229, ..., 99955875, 99979354, 99995791])

In [82]:
# # this is pathological, takes > 1 minute 
# %time hc[ix_dense_bool]

In [83]:
# this is pretty slow
%time hc[::1000]

CPU times: user 38.3 s, sys: 136 ms, total: 38.4 s
Wall time: 38.1 s


array([       0,     1000,     2000, ..., 99997000, 99998000, 99999000])