In [1]:
import sys
sys.path.insert(0, '..')
import cProfile
import numpy as np; print('numpy', np.__version__)
import zarr; print('zarr', zarr.__version__, 'blosc', ' '.join(zarr.blosc_version()))
import bcolz; print('bcolz', bcolz.__version__, 'blosc', ' '.join(bcolz.blosc_version()))
import line_profiler
bcolz.blosc_set_nthreads(1)

numpy 1.11.0
zarr 0.3.1.dev8 blosc 1.8.1 $Date:: 2016-04-08 #$
bcolz 1.0.0 blosc 1.8.1 $Date:: 2016-04-08 #$


4

## 1D array creation

In [2]:
a = np.arange(1e8, dtype='i4')
a

array([       0,        1,        2, ..., 99999997, 99999998, 99999999], dtype=int32)

In [3]:
c = bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))
c

carray((100000000,), int32)
  nbytes: 381.47 MB; cbytes: 7.68 MB; ratio: 49.67
  cparams := cparams(clevel=5, shuffle=1, cname='lz4')
[       0        1        2 ..., 99999997 99999998 99999999]

In [4]:
z = zarr.array(a, chunks=c.chunklen, cname='lz4')
z

zarr.ext.SynchronizedArray((100000000,), int32, chunks=(262144,))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.6M; ratio: 57.4; initialized: 382/382

In [5]:
zl = zarr.array(a, chunks=c.chunklen, cname='lz4', lazy=True)
zl

zarr.ext.SynchronizedLazyArray((100000000,), int32, chunks=(262144,))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.6M; ratio: 57.4; initialized: 382/382

In [6]:
%timeit bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))

1 loops, best of 3: 154 ms per loop


In [7]:
%timeit zarr.array(a, chunks=c.chunklen, cname='lz4')

10 loops, best of 3: 134 ms per loop


In [8]:
%timeit zarr.array(a, chunks=c.chunklen, cname='lz4', lazy=True)

10 loops, best of 3: 142 ms per loop


In [9]:
cProfile.run('zarr.array(a, chunks=c.chunklen, cname="lz4")', sort='time')

         8802 function calls (7271 primitive calls) in 0.185 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.163    0.000    0.163    0.000 ext.pyx:400(put)
  762/382    0.004    0.000    0.170    0.000 ext.pyx:318(__setitem__)
  763/382    0.003    0.000    0.005    0.000 ext.pyx:1107(create_chunk)
    381/0    0.002    0.000    0.000          ext.pyx:905(genexpr)
      382    0.002    0.000    0.004    0.000 ext.pyx:245(__cinit__)
    381/0    0.002    0.000    0.000          ext.pyx:919(genexpr)
      763    0.001    0.000    0.002    0.000 numeric.py:2064(isscalar)
      762    0.001    0.000    0.001    0.000 {built-in method array}
  384/383    0.001    0.000    0.001    0.000 ext.pyx:143(_normalize_shape)
      382    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
      763    0.001    0.000    0.001    0.000 {built-in method isinstance}
      381    0.001    0.000    0.002    0.000 nume

In [10]:
cProfile.run('zarr.array(a, chunks=c.chunklen, cname="lz4", lazy=True)', sort='time')

         10332 function calls (8800 primitive calls) in 0.185 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.162    0.000    0.162    0.000 ext.pyx:400(put)
      382    0.002    0.000    0.168    0.000 ext.pyx:318(__setitem__)
  763/382    0.002    0.000    0.007    0.000 ext.pyx:1440(get_chunk)
  763/382    0.002    0.000    0.169    0.000 ext.pyx:450(__setitem__)
    381/0    0.002    0.000    0.000          ext.pyx:905(genexpr)
    381/0    0.002    0.000    0.000          ext.pyx:919(genexpr)
      382    0.001    0.000    0.003    0.000 ext.pyx:245(__cinit__)
      763    0.001    0.000    0.002    0.000 numeric.py:2064(isscalar)
      382    0.001    0.000    0.006    0.000 ext.pyx:1325(_lazy_get_chunk)
      762    0.001    0.000    0.001    0.000 {built-in method array}
      382    0.001    0.000    0.005    0.000 ext.pyx:1445(create_chunk)
  384/383    0.001    0.000    0.001    0.000 ext.pyx:143(

## 1D array read

In [11]:
%timeit c[:]

1 loops, best of 3: 200 ms per loop


In [12]:
%timeit z[:]

1 loops, best of 3: 164 ms per loop


In [13]:
%timeit zl[:]

1 loops, best of 3: 162 ms per loop


In [14]:
cProfile.run('z[:]', sort='time')

         3067 function calls (1920 primitive calls) in 0.219 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.193    0.001    0.193    0.001 ext.pyx:389(get)
    381/0    0.017    0.000    0.000          ext.pyx:854(genexpr)
  763/382    0.004    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
    381/0    0.004    0.000    0.000          ext.pyx:860(genexpr)
      2/1    0.001    0.000    0.001    0.001 <string>:1(<module>)
      382    0.001    0.000    0.001    0.000 ext.pyx:1104(get_chunk)
      2/1    0.000    0.000    0.001    0.001 ext.pyx:826(__getitem__)
        1    0.000    0.000    0.219    0.219 ext.pyx:835(genexpr)
      2/1    0.000    0.000    0.001    0.001 {built-in method exec}
      383    0.000    0.000    0.000    0.000 ext.pyx:382(__get__)
        1    0.000    0.000    0.000    0.000 ext.pyx:655(_get_chunk_range)
      382    0.000    0.000    0.000    0.000 ext.pyx:265(__get__)
  

In [15]:
cProfile.run('zl[:]', sort='time')

         3449 function calls (2302 primitive calls) in 0.190 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.180    0.000    0.180    0.000 ext.pyx:389(get)
  763/382    0.003    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
    381/0    0.002    0.000    0.000          ext.pyx:860(genexpr)
    381/0    0.002    0.000    0.000          ext.pyx:854(genexpr)
      382    0.001    0.000    0.001    0.000 ext.pyx:1440(get_chunk)
      2/1    0.001    0.000    0.001    0.001 <string>:1(<module>)
      382    0.000    0.000    0.000    0.000 ext.pyx:1325(_lazy_get_chunk)
      2/1    0.000    0.000    0.001    0.001 ext.pyx:826(__getitem__)
        1    0.000    0.000    0.190    0.190 ext.pyx:835(genexpr)
      382    0.000    0.000    0.000    0.000 ext.pyx:265(__get__)
      2/1    0.000    0.000    0.001    0.001 {built-in method exec}
      383    0.000    0.000    0.000    0.000 ext.pyx:382(__get__)
  

## 2D array creation

In [16]:
a = np.arange(1e8, dtype='i4').reshape((10000, 10000))
a

array([[       0,        1,        2, ...,     9997,     9998,     9999],
       [   10000,    10001,    10002, ...,    19997,    19998,    19999],
       [   20000,    20001,    20002, ...,    29997,    29998,    29999],
       ..., 
       [99970000, 99970001, 99970002, ..., 99979997, 99979998, 99979999],
       [99980000, 99980001, 99980002, ..., 99989997, 99989998, 99989999],
       [99990000, 99990001, 99990002, ..., 99999997, 99999998, 99999999]], dtype=int32)

In [17]:
c = bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))
c

carray((10000, 10000), int32)
  nbytes: 381.47 MB; cbytes: 7.75 MB; ratio: 49.25
  cparams := cparams(clevel=5, shuffle=1, cname='lz4')
[[       0        1        2 ...,     9997     9998     9999]
 [   10000    10001    10002 ...,    19997    19998    19999]
 [   20000    20001    20002 ...,    29997    29998    29999]
 ..., 
 [99970000 99970001 99970002 ..., 99979997 99979998 99979999]
 [99980000 99980001 99980002 ..., 99989997 99989998 99989999]
 [99990000 99990001 99990002 ..., 99999997 99999998 99999999]]

In [18]:
c.chunklen

26

In [19]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
z

zarr.ext.SynchronizedArray((10000, 10000), int32, chunks=(26, 10000))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.7M; ratio: 56.8; initialized: 385/385

In [20]:
zl = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)
zl

zarr.ext.SynchronizedLazyArray((10000, 10000), int32, chunks=(26, 10000))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.7M; ratio: 56.8; initialized: 385/385

In [21]:
%timeit bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))

10 loops, best of 3: 154 ms per loop


In [22]:
%timeit zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')

10 loops, best of 3: 135 ms per loop


In [23]:
%timeit zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)

10 loops, best of 3: 149 ms per loop


In [24]:
cProfile.run('zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname="lz4")', sort='time')

         8872 function calls (7329 primitive calls) in 0.177 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      385    0.155    0.000    0.156    0.000 ext.pyx:400(put)
  768/385    0.004    0.000    0.162    0.000 ext.pyx:318(__setitem__)
    384/0    0.003    0.000    0.000          ext.pyx:905(genexpr)
  769/385    0.002    0.000    0.004    0.000 ext.pyx:1107(create_chunk)
      385    0.002    0.000    0.003    0.000 ext.pyx:245(__cinit__)
    384/0    0.002    0.000    0.000          ext.pyx:919(genexpr)
      769    0.001    0.000    0.002    0.000 numeric.py:2064(isscalar)
      768    0.001    0.000    0.001    0.000 {built-in method array}
  387/386    0.001    0.000    0.001    0.000 ext.pyx:143(_normalize_shape)
      385    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
      769    0.001    0.000    0.001    0.000 {built-in method isinstance}
      384    0.001    0.000    0.002    0.000 nume

In [25]:
cProfile.run('zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname="lz4", lazy=True)', sort='time')

         10414 function calls (8870 primitive calls) in 0.202 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      385    0.175    0.000    0.176    0.000 ext.pyx:400(put)
      385    0.003    0.000    0.183    0.000 ext.pyx:318(__setitem__)
  769/385    0.003    0.000    0.008    0.000 ext.pyx:1440(get_chunk)
  769/385    0.003    0.000    0.184    0.000 ext.pyx:450(__setitem__)
    384/0    0.003    0.000    0.000          ext.pyx:905(genexpr)
    384/0    0.002    0.000    0.000          ext.pyx:919(genexpr)
      385    0.002    0.000    0.003    0.000 ext.pyx:245(__cinit__)
      769    0.002    0.000    0.002    0.000 numeric.py:2064(isscalar)
      385    0.001    0.000    0.007    0.000 ext.pyx:1325(_lazy_get_chunk)
      768    0.001    0.000    0.001    0.000 {built-in method array}
      385    0.001    0.000    0.006    0.000 ext.pyx:1445(create_chunk)
      385    0.001    0.000    0.001    0.000 ext.pyx:152(

In [26]:
%timeit zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')

1 loops, best of 3: 191 ms per loop


In [27]:
%timeit zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4', lazy=True)

1 loops, best of 3: 194 ms per loop


In [28]:
cProfile.run('zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname="lz4")', sort='time')

         8951 function calls (7388 primitive calls) in 0.255 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      390    0.174    0.000    0.174    0.000 ext.pyx:400(put)
      760    0.057    0.000    0.057    0.000 {built-in method array}
  778/390    0.007    0.000    0.239    0.001 ext.pyx:318(__setitem__)
    389/0    0.003    0.000    0.000          ext.pyx:905(genexpr)
  779/390    0.002    0.000    0.005    0.000 ext.pyx:1107(create_chunk)
      390    0.002    0.000    0.004    0.000 ext.pyx:245(__cinit__)
    389/0    0.002    0.000    0.000          ext.pyx:919(genexpr)
      770    0.002    0.000    0.002    0.000 numeric.py:2064(isscalar)
      390    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
  392/391    0.001    0.000    0.001    0.000 ext.pyx:143(_normalize_shape)
      770    0.001    0.000    0.001    0.000 {built-in method isinstance}
      380    0.001    0.000    0.058    0.000 nume

In [29]:
cProfile.run('zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname="lz4", lazy=True)', sort='time')

         10513 function calls (8949 primitive calls) in 0.265 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      390    0.181    0.000    0.181    0.000 ext.pyx:400(put)
      760    0.056    0.000    0.056    0.000 {built-in method array}
      390    0.004    0.000    0.245    0.001 ext.pyx:318(__setitem__)
    389/0    0.003    0.000    0.000          ext.pyx:919(genexpr)
  779/390    0.003    0.000    0.009    0.000 ext.pyx:1440(get_chunk)
    389/0    0.003    0.000    0.000          ext.pyx:905(genexpr)
  779/390    0.003    0.000    0.245    0.001 ext.pyx:450(__setitem__)
      390    0.002    0.000    0.003    0.000 ext.pyx:245(__cinit__)
      770    0.002    0.000    0.002    0.000 numeric.py:2064(isscalar)
      390    0.002    0.000    0.007    0.000 ext.pyx:1325(_lazy_get_chunk)
      390    0.001    0.000    0.006    0.000 ext.pyx:1445(create_chunk)
      390    0.001    0.000    0.001    0.000 ext.pyx:152(

## 2D array read

In [30]:
%timeit c[:]

1 loops, best of 3: 200 ms per loop


In [31]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
%timeit z[:]

10 loops, best of 3: 162 ms per loop


In [32]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)
%timeit z[:]

10 loops, best of 3: 162 ms per loop


In [33]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')
%timeit z[:]

1 loops, best of 3: 225 ms per loop


In [34]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4', lazy=True)
%timeit z[:]

1 loops, best of 3: 233 ms per loop


## 2D array slices

In [35]:
%timeit c[1000:2000]

10 loops, best of 3: 20.4 ms per loop


In [36]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
%timeit z[1000:2000]

100 loops, best of 3: 17 ms per loop


In [37]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)
%timeit z[1000:2000]

100 loops, best of 3: 17.1 ms per loop


In [38]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')
%timeit z[1000:2000]

10 loops, best of 3: 27.1 ms per loop


In [39]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4', lazy=True)
%timeit z[1000:2000]

10 loops, best of 3: 26.7 ms per loop


In [40]:
%timeit c[:, 1000:2000]

1 loops, best of 3: 209 ms per loop


In [41]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
%timeit z[:, 1000:2000]

10 loops, best of 3: 147 ms per loop


In [42]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')
%timeit z[:, 1000:2000]

100 loops, best of 3: 17.6 ms per loop
