In [1]:
import sys
sys.path.insert(0, '..')
import zarr
import numpy as np
np.random.seed(42)
import cProfile

## Demonstrate advanced indexing

### Indexing with Boolean arrays

In [2]:
a = np.arange(10)

In [3]:
ix = np.random.binomial(1, 0.5, size=a.shape[0]).astype(bool)
ix

array([False,  True,  True,  True, False, False, False,  True,  True,  True], dtype=bool)

In [4]:
a[ix]

array([1, 2, 3, 7, 8, 9])

In [5]:
za = zarr.array(a, chunks=2)
za[ix]

array([1, 2, 3, 7, 8, 9])

In [6]:
za[ix] = a[ix] * 10
za[:]

array([ 0, 10, 20, 30,  4,  5,  6, 70, 80, 90])

### Indexing with integer arrays

In [7]:
ix = np.random.choice(a.shape[0], size=a.shape[0]//2)
ix

array([5, 4, 1, 7, 5])

In [8]:
a[ix]

array([5, 4, 1, 7, 5])

In [9]:
za = zarr.array(a, chunks=2)
za[ix]

array([5, 4, 1, 7, 5])

In [10]:
za[ix] = a[ix] * 10
za[:]

array([ 0, 10,  2,  3, 40, 50,  6, 70,  8,  9])

### Multidimensional indexing

N.B., orthogonal indexing is available. This is different from numpy fancy indexing if more than one dimension is indexed with an array.

In [11]:
b = np.arange(100).reshape(10, 10)

In [12]:
ix0 = np.random.binomial(1, 0.5, size=b.shape[0]).astype(bool)
ix0

array([False,  True,  True,  True, False, False,  True, False, False,  True], dtype=bool)

In [13]:
ix1 = np.random.binomial(1, 0.5, size=b.shape[1]).astype(bool)
ix1

array([False, False,  True, False,  True, False,  True,  True, False, False], dtype=bool)

In [14]:
b[np.ix_(ix0, ix1)]

array([[12, 14, 16, 17],
       [22, 24, 26, 27],
       [32, 34, 36, 37],
       [62, 64, 66, 67],
       [92, 94, 96, 97]])

In [15]:
zb = zarr.array(b, chunks=(2, 2))
zb.oindex[ix0, ix1]

array([[12, 14, 16, 17],
       [22, 24, 26, 27],
       [32, 34, 36, 37],
       [62, 64, 66, 67],
       [92, 94, 96, 97]])

In [16]:
zb.oindex[ix0, ix1] = -1
zb[:]

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, -1, 13, -1, 15, -1, -1, 18, 19],
       [20, 21, -1, 23, -1, 25, -1, -1, 28, 29],
       [30, 31, -1, 33, -1, 35, -1, -1, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, -1, 63, -1, 65, -1, -1, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, -1, 93, -1, 95, -1, -1, 98, 99]])

In [17]:
ix0 = np.random.choice(b.shape[0], size=b.shape[0]//2)
ix0

array([8, 1, 9, 8, 9])

In [18]:
ix1 = np.random.choice(b.shape[1], size=b.shape[1]//2)
ix1

array([4, 1, 3, 6, 7])

In [19]:
b[np.ix_(ix0, ix1)]

array([[84, 81, 83, 86, 87],
       [14, 11, 13, 16, 17],
       [94, 91, 93, 96, 97],
       [84, 81, 83, 86, 87],
       [94, 91, 93, 96, 97]])

In [20]:
zb = zarr.array(b, chunks=(2, 2))
zb.oindex[ix0, ix1]

array([[84, 81, 83, 86, 87],
       [14, 11, 13, 16, 17],
       [94, 91, 93, 96, 97],
       [84, 81, 83, 86, 87],
       [94, 91, 93, 96, 97]])

In [21]:
zb.oindex[ix0, ix1] = -1
zb[:]

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, -1, 12, -1, -1, 15, -1, -1, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, -1, 82, -1, -1, 85, -1, -1, 88, 89],
       [90, -1, 92, -1, -1, 95, -1, -1, 98, 99]])

### Indexing with zarr bool arrays

In [22]:
ix = np.random.binomial(1, 0.5, size=a.shape[0]).astype(bool)
zix = zarr.array(ix, chunks=2)

In [23]:
za = zarr.array(a, chunks=2)
za[ix]

array([1, 3, 5, 6, 8, 9])

In [24]:
# will not load all zix into memory
za[zix]

array([1, 3, 5, 6, 8, 9])

## 1D Benchmarking

In [25]:
c = np.arange(100000000)
c.nbytes

800000000

In [26]:
%time zc = zarr.array(c)
zc.info

CPU times: user 524 ms, sys: 124 ms, total: 648 ms
Wall time: 226 ms


0,1
Type,zarr.core.Array
Data type,int64
Shape,"(100000000,)"
Chunk shape,"(48829,)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,800000000 (762.9M)
No. bytes stored,11870277 (11.3M)


In [27]:
%time c.copy()

CPU times: user 124 ms, sys: 52 ms, total: 176 ms
Wall time: 173 ms


array([       0,        1,        2, ..., 99999997, 99999998, 99999999])

In [28]:
%time zc[:]

CPU times: user 512 ms, sys: 108 ms, total: 620 ms
Wall time: 312 ms


array([       0,        1,        2, ..., 99999997, 99999998, 99999999])

### bool dense selection

In [29]:
# relatively dense selection
ix_dense_bool = np.random.binomial(1, 0.5, size=c.shape[0]).astype(bool)
np.count_nonzero(ix_dense_bool)

49994863

In [30]:
%time c[ix_dense_bool]

CPU times: user 612 ms, sys: 8 ms, total: 620 ms
Wall time: 621 ms


array([       0,        1,        2, ..., 99999994, 99999995, 99999996])

In [31]:
%time zc[ix_dense_bool]

CPU times: user 1.47 s, sys: 124 ms, total: 1.6 s
Wall time: 922 ms


array([       0,        1,        2, ..., 99999994, 99999995, 99999996])

In [32]:
cProfile.run('zc[ix_dense_bool]', sort='time')

         116791 function calls in 0.943 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2049    0.445    0.000    0.445    0.000 {method 'nonzero' of 'numpy.ndarray' objects}
     2048    0.234    0.000    0.250    0.000 core.py:931(_decode_chunk)
     2048    0.140    0.000    0.425    0.000 core.py:769(_chunk_getitem)
     2049    0.013    0.000    0.497    0.000 new_indexing.py:547(__iter__)
     2048    0.011    0.000    0.011    0.000 {built-in method numpy.core.multiarray.count_nonzero}
     2048    0.011    0.000    0.465    0.000 index_tricks.py:26(ix_)
     4096    0.010    0.000    0.010    0.000 core.py:324(<genexpr>)
     4096    0.009    0.000    0.009    0.000 {method 'reshape' of 'numpy.ndarray' objects}
        1    0.006    0.006    0.928    0.928 core.py:549(_get_selection)
     2048    0.006    0.000    0.006    0.000 {built-in method numpy.core.multiarray.frombuffer}
     2048    0.005    0.000    0

### int dense selection

In [33]:
ix_dense_int = np.random.choice(c.shape[0], size=c.shape[0]//2, replace=True)
ix_dense_int_sorted = ix_dense_int.copy()
ix_dense_int_sorted.sort()
len(ix_dense_int), ix_dense_int

(50000000,
 array([86098038, 51488465,  9242439, ..., 31235734, 20293124, 13824417]))

In [34]:
%time np.argsort(ix_dense_int, kind='quicksort')

CPU times: user 10.6 s, sys: 0 ns, total: 10.6 s
Wall time: 10.6 s


array([35886154,  6592339, 23747762, ..., 26251840, 48664862,  3479456])

In [36]:
%time c[ix_dense_int_sorted]

CPU times: user 152 ms, sys: 0 ns, total: 152 ms
Wall time: 152 ms


array([       0,        0,        1, ..., 99999994, 99999999, 99999999])

In [37]:
%time zc[ix_dense_int_sorted]

CPU times: user 1.39 s, sys: 188 ms, total: 1.58 s
Wall time: 1.29 s


array([       0,        0,        1, ..., 99999994, 99999999, 99999999])

In [38]:
%time c[ix_dense_int]

CPU times: user 684 ms, sys: 36 ms, total: 720 ms
Wall time: 718 ms


array([86098038, 51488465,  9242439, ..., 31235734, 20293124, 13824417])

In [39]:
%time zc[ix_dense_int]

CPU times: user 14.8 s, sys: 716 ms, total: 15.5 s
Wall time: 14.7 s


array([86098038, 51488465,  9242439, ..., 31235734, 20293124, 13824417])

In [40]:
cProfile.run('zc[ix_dense_int_sorted]', sort='time')

         110675 function calls in 1.659 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.781    0.781    1.051    1.051 new_indexing.py:393(__init__)
     2048    0.209    0.000    0.380    0.000 core.py:769(_chunk_getitem)
     2049    0.148    0.000    0.150    0.000 new_indexing.py:440(__iter__)
        1    0.132    0.132    0.132    0.132 function_base.py:1848(diff)
     2048    0.130    0.000    0.141    0.000 core.py:931(_decode_chunk)
        1    0.120    0.120    0.120    0.120 {built-in method numpy.core.multiarray.bincount}
        1    0.018    0.018    1.642    1.642 core.py:527(get_orthogonal_selection)
        1    0.018    0.018    1.659    1.659 <string>:1(<module>)
        4    0.017    0.004    0.017    0.004 {method 'reduce' of 'numpy.ufunc' objects}
     2049    0.012    0.000    0.188    0.000 new_indexing.py:547(__iter__)
     4096    0.008    0.000    0.008    0.000 core.py:324(<genexp

In [41]:
cProfile.run('zc[ix_dense_int]', sort='time')

         139355 function calls in 16.778 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1   13.664   13.664   13.664   13.664 {method 'argsort' of 'numpy.ndarray' objects}
        1    0.787    0.787   15.497   15.497 new_indexing.py:393(__init__)
        1    0.776    0.776    0.776    0.776 {method 'take' of 'numpy.ndarray' objects}
     2048    0.741    0.000    1.006    0.000 core.py:769(_chunk_getitem)
     2048    0.220    0.000    0.235    0.000 core.py:931(_decode_chunk)
     2049    0.140    0.000    0.141    0.000 new_indexing.py:440(__iter__)
        1    0.130    0.130    0.130    0.130 function_base.py:1848(diff)
        1    0.121    0.121    0.121    0.121 {built-in method numpy.core.multiarray.bincount}
        1    0.040    0.040   16.760   16.760 core.py:392(__getitem__)
        4    0.019    0.005    0.019    0.005 {method 'reduce' of 'numpy.ufunc' objects}
        1    0.018    0.018   16.778   

### bool sparse selection

In [42]:
# relatively sparse selection
ix_sparse_bool = np.random.binomial(1, 0.0001, size=c.shape[0]).astype(bool)
np.count_nonzero(ix_sparse_bool)

9958

In [43]:
%time c[ix_sparse_bool]

CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 21.6 ms


array([    1063,    28396,    37229, ..., 99955875, 99979354, 99995791])

In [44]:
%time zc[ix_sparse_bool]

CPU times: user 508 ms, sys: 72 ms, total: 580 ms
Wall time: 288 ms


array([    1063,    28396,    37229, ..., 99955875, 99979354, 99995791])

In [45]:
cProfile.run('zc[ix_sparse_bool]', sort='time')

         116461 function calls in 0.300 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2042    0.145    0.000    0.158    0.000 core.py:931(_decode_chunk)
     2043    0.023    0.000    0.023    0.000 {method 'nonzero' of 'numpy.ndarray' objects}
     2043    0.013    0.000    0.074    0.000 new_indexing.py:547(__iter__)
     2042    0.013    0.000    0.206    0.000 core.py:769(_chunk_getitem)
     2048    0.011    0.000    0.011    0.000 {built-in method numpy.core.multiarray.count_nonzero}
     2042    0.011    0.000    0.042    0.000 index_tricks.py:26(ix_)
     4084    0.010    0.000    0.010    0.000 core.py:324(<genexpr>)
     4084    0.007    0.000    0.007    0.000 {method 'reshape' of 'numpy.ndarray' objects}
        1    0.005    0.005    0.286    0.286 core.py:549(_get_selection)
     2042    0.005    0.000    0.011    0.000 arrayprint.py:381(wrapper)
     2043    0.005    0.000    0.006    0.000 new_indexi

### int sparse selection

In [52]:
ix_sparse_int = np.random.choice(c.shape[0], size=c.shape[0]//10000, replace=True)
ix_sparse_int_sorted = ix_sparse_int.copy()
ix_sparse_int_sorted.sort()
len(ix_sparse_int), ix_sparse_int

(10000,
 array([94979430, 11935675, 63597355, ..., 91349759, 40936288, 76612910]))

In [53]:
%time c[ix_sparse_int_sorted]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 279 µs


array([   11962,    27590,    30701, ..., 99968761, 99977334, 99990442])

In [54]:
%time c[ix_sparse_int]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 362 µs


array([94979430, 11935675, 63597355, ..., 91349759, 40936288, 76612910])

In [55]:
%time zc[ix_sparse_int_sorted]

CPU times: user 472 ms, sys: 52 ms, total: 524 ms
Wall time: 243 ms


array([   11962,    27590,    30701, ..., 99968761, 99977334, 99990442])

In [56]:
%time zc[ix_sparse_int]

CPU times: user 448 ms, sys: 104 ms, total: 552 ms
Wall time: 255 ms


array([94979430, 11935675, 63597355, ..., 91349759, 40936288, 76612910])

In [57]:
cProfile.run('zc[ix_sparse_int]', sort='time')

         138743 function calls in 0.280 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     2039    0.156    0.000    0.167    0.000 core.py:931(_decode_chunk)
     2040    0.013    0.000    0.064    0.000 new_indexing.py:547(__iter__)
     4078    0.012    0.000    0.025    0.000 index_tricks.py:26(ix_)
     2039    0.009    0.000    0.210    0.000 core.py:769(_chunk_getitem)
     4078    0.009    0.000    0.009    0.000 core.py:324(<genexpr>)
     2040    0.009    0.000    0.010    0.000 new_indexing.py:440(__iter__)
     6117    0.008    0.000    0.008    0.000 {method 'reshape' of 'numpy.ndarray' objects}
        1    0.005    0.005    0.279    0.279 core.py:549(_get_selection)
     2039    0.005    0.000    0.010    0.000 arrayprint.py:381(wrapper)
     4078    0.004    0.000    0.034    0.000 new_indexing.py:466(ix_)
     2039    0.004    0.000    0.013    0.000 core.py:319(_cdata_shape)
     2039    0.004    0.000  

### sparse bool selection as zarr array

In [58]:
zix_sparse_bool = zarr.array(ix_sparse_bool)
zix_sparse_bool.info

0,1
Type,zarr.core.Array
Data type,bool
Shape,"(100000000,)"
Chunk shape,"(195313,)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,100000000 (95.4M)
No. bytes stored,511297 (499.3K)


In [59]:
%time zc[zix_sparse_bool]

CPU times: user 1.01 s, sys: 228 ms, total: 1.24 s
Wall time: 640 ms


array([    1063,    28396,    37229, ..., 99955875, 99979354, 99995791])

### slice with step

In [60]:
%time np.array(c[::2])

CPU times: user 80 ms, sys: 24 ms, total: 104 ms
Wall time: 101 ms


array([       0,        2,        4, ..., 99999994, 99999996, 99999998])

In [61]:
%time zc[::2]

CPU times: user 1.45 s, sys: 664 ms, total: 2.12 s
Wall time: 1.78 s


array([       0,        2,        4, ..., 99999994, 99999996, 99999998])

In [62]:
%time zc[::10]

CPU times: user 572 ms, sys: 224 ms, total: 796 ms
Wall time: 513 ms


array([       0,       10,       20, ..., 99999970, 99999980, 99999990])

In [63]:
%time zc[::100]

CPU times: user 488 ms, sys: 88 ms, total: 576 ms
Wall time: 273 ms


array([       0,      100,      200, ..., 99999700, 99999800, 99999900])

In [64]:
%time zc[::1000]

CPU times: user 472 ms, sys: 64 ms, total: 536 ms
Wall time: 225 ms


array([       0,     1000,     2000, ..., 99997000, 99998000, 99999000])

## 2D Benchmarking

In [89]:
c.shape

(100000000,)

In [90]:
d = c.reshape(-1, 1000)
d.shape

(100000, 1000)

In [91]:
zd = zarr.array(d)
zd.info

0,1
Type,zarr.core.Array
Data type,int64
Shape,"(100000, 1000)"
Chunk shape,"(1563, 32)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,builtins.dict
No. bytes,800000000 (762.9M)
No. bytes stored,39862349 (38.0M)


### bool orthogonal selection

In [92]:
ix0 = np.random.binomial(1, 0.5, size=d.shape[0]).astype(bool)
ix1 = np.random.binomial(1, 0.5, size=d.shape[1]).astype(bool)


In [93]:
%time d[np.ix_(ix0, ix1)]

CPU times: user 124 ms, sys: 40 ms, total: 164 ms
Wall time: 164 ms


array([[       3,        5,        6, ...,      994,      995,      997],
       [    2003,     2005,     2006, ...,     2994,     2995,     2997],
       [    3003,     3005,     3006, ...,     3994,     3995,     3997],
       ..., 
       [99995003, 99995005, 99995006, ..., 99995994, 99995995, 99995997],
       [99997003, 99997005, 99997006, ..., 99997994, 99997995, 99997997],
       [99999003, 99999005, 99999006, ..., 99999994, 99999995, 99999997]])

In [94]:
%time zd.oindex[ix0, ix1]

CPU times: user 832 ms, sys: 84 ms, total: 916 ms
Wall time: 533 ms


array([[       3,        5,        6, ...,      994,      995,      997],
       [    2003,     2005,     2006, ...,     2994,     2995,     2997],
       [    3003,     3005,     3006, ...,     3994,     3995,     3997],
       ..., 
       [99995003, 99995005, 99995006, ..., 99995994, 99995995, 99995997],
       [99997003, 99997005, 99997006, ..., 99997994, 99997995, 99997997],
       [99999003, 99999005, 99999006, ..., 99999994, 99999995, 99999997]])

### int orthogonal selection

In [95]:
ix0 = np.random.choice(d.shape[0], size=int(d.shape[0] * .5), replace=True)
ix1 = np.random.choice(d.shape[1], size=int(d.shape[1] * .5), replace=True)

In [96]:
%time d[np.ix_(ix0, ix1)]

CPU times: user 224 ms, sys: 56 ms, total: 280 ms
Wall time: 277 ms


array([[16704459, 16704351, 16704547, ..., 16704405, 16704425, 16704805],
       [10766459, 10766351, 10766547, ..., 10766405, 10766425, 10766805],
       [64625459, 64625351, 64625547, ..., 64625405, 64625425, 64625805],
       ..., 
       [12875459, 12875351, 12875547, ..., 12875405, 12875425, 12875805],
       [58689459, 58689351, 58689547, ..., 58689405, 58689425, 58689805],
       [18138459, 18138351, 18138547, ..., 18138405, 18138425, 18138805]])

In [97]:
%time zd.oindex[ix0, ix1]

CPU times: user 1.06 s, sys: 120 ms, total: 1.18 s
Wall time: 675 ms


array([[16704459, 16704351, 16704547, ..., 16704405, 16704425, 16704805],
       [10766459, 10766351, 10766547, ..., 10766405, 10766425, 10766805],
       [64625459, 64625351, 64625547, ..., 64625405, 64625425, 64625805],
       ..., 
       [12875459, 12875351, 12875547, ..., 12875405, 12875425, 12875805],
       [58689459, 58689351, 58689547, ..., 58689405, 58689425, 58689805],
       [18138459, 18138351, 18138547, ..., 18138405, 18138425, 18138805]])

### coordinate (point) selection

In [120]:
n = int(d.size * .005)
ix0 = np.random.choice(d.shape[0], size=n, replace=True)
ix1 = np.random.choice(d.shape[1], size=n, replace=True)
n

500000

In [121]:
%time np.lexsort((ix0, ix1))

CPU times: user 228 ms, sys: 0 ns, total: 228 ms
Wall time: 228 ms


array([235092, 460446, 351446, ...,  66295,  90139, 174162])

In [122]:
ix0_sorted = np.sort(ix0)
ix1_sorted = np.sort(ix1)
%time np.lexsort((ix0_sorted, ix1_sorted))

CPU times: user 48 ms, sys: 0 ns, total: 48 ms
Wall time: 46.8 ms


array([     0,      1,      2, ..., 499997, 499998, 499999])

In [113]:
%time d[ix0, ix1]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 1.29 ms


array([15735056, 50367996, 82690284, ..., 79292255, 83283781, 38856303])

In [114]:
%time zd.vindex[ix0, ix1]

CPU times: user 13.5 s, sys: 1.7 s, total: 15.2 s
Wall time: 6.33 s


array([15735056, 50367996, 82690284, ..., 79292255, 83283781, 38856303])

In [116]:
zd.nchunks

2048

In [115]:
cProfile.run('zd.vindex[ix0, ix1]', sort='time')

         2698965 function calls in 5.459 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    49978    2.866    0.000    3.116    0.000 core.py:931(_decode_chunk)
    49978    0.411    0.000    4.602    0.000 core.py:769(_chunk_getitem)
    49979    0.316    0.000    0.752    0.000 new_indexing.py:660(__iter__)
   149934    0.296    0.000    0.296    0.000 new_indexing.py:677(<genexpr>)
   149940    0.202    0.000    0.202    0.000 core.py:324(<genexpr>)
    99956    0.162    0.000    0.343    0.000 arrayprint.py:381(wrapper)
    49978    0.129    0.000    0.533    0.000 {method 'join' of 'str' objects}
    49978    0.115    0.000    0.115    0.000 {method 'reshape' of 'numpy.ndarray' objects}
    99956    0.105    0.000    0.123    0.000 arrayprint.py:399(array2string)
        1    0.099    0.099    5.454    5.454 core.py:549(_get_selection)
    49978    0.097    0.000    0.169    0.000 util.py:113(is_total_slice)
    49980

## h5py comparison

N.B., not really fair because using slower compressor, but for interest...

In [65]:
import h5py
import tempfile

In [78]:
h5f = h5py.File(tempfile.mktemp(), driver='core', backing_store=False)

In [79]:
hc = h5f.create_dataset('c', data=c, compression='gzip', compression_opts=1, chunks=zc.chunks, shuffle=True)
hc

<HDF5 dataset "c": shape (100000000,), type "<i8">

In [80]:
%time hc[:]

CPU times: user 1.16 s, sys: 172 ms, total: 1.33 s
Wall time: 1.32 s


array([       0,        1,        2, ..., 99999997, 99999998, 99999999])

In [81]:
%time hc[ix_sparse_bool]

CPU times: user 1.11 s, sys: 0 ns, total: 1.11 s
Wall time: 1.11 s


array([    1063,    28396,    37229, ..., 99955875, 99979354, 99995791])

In [82]:
# # this is pathological, takes > 1 minute 
# %time hc[ix_dense_bool]

In [83]:
# this is pretty slow
%time hc[::1000]

CPU times: user 38.3 s, sys: 136 ms, total: 38.4 s
Wall time: 38.1 s


array([       0,     1000,     2000, ..., 99997000, 99998000, 99999000])