In [1]:
import sys
sys.path.insert(0, '..')
import cProfile
import numpy as np; print('numpy', np.__version__)
import zarr; print('zarr', zarr.__version__, 'blosc', ' '.join(zarr.blosc_version()))
import bcolz; print('bcolz', bcolz.__version__, 'blosc', ' '.join(bcolz.blosc_version()))
import line_profiler
bcolz.blosc_set_nthreads(1)

numpy 1.10.2
zarr 0.2.8.dev16+dirty blosc 1.7.0 $Date:: 2015-07-05 #$
bcolz 0.12.2.dev22+dirty blosc 1.7.0 $Date:: 2015-07-05 #$


4

## 1D array creation

In [2]:
a = np.arange(1e8, dtype='i4')
a

array([       0,        1,        2, ..., 99999997, 99999998, 99999999], dtype=int32)

In [3]:
c = bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))
c

carray((100000000,), int32)
  nbytes: 381.47 MB; cbytes: 7.68 MB; ratio: 49.67
  cparams := cparams(clevel=5, shuffle=True, cname='lz4')
[       0        1        2 ..., 99999997 99999998 99999999]

In [4]:
z = zarr.array(a, chunks=c.chunklen, cname='lz4')
z

zarr.ext.SynchronizedArray((100000000,), int32, chunks=(262144,))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.6M; ratio: 57.4; initialized: 382/382

In [5]:
zl = zarr.array(a, chunks=c.chunklen, cname='lz4', lazy=True)
zl

zarr.ext.SynchronizedLazyArray((100000000,), int32, chunks=(262144,))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.7M; ratio: 57.1; initialized: 382/382

In [6]:
%timeit bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))

10 loops, best of 3: 131 ms per loop


In [7]:
%timeit zarr.array(a, chunks=c.chunklen, cname='lz4')

10 loops, best of 3: 140 ms per loop


In [8]:
%timeit zarr.array(a, chunks=c.chunklen, cname='lz4', lazy=True)

10 loops, best of 3: 142 ms per loop


In [9]:
cProfile.run('zarr.array(a, chunks=c.chunklen, cname="lz4")', sort='time')

         8803 function calls (7272 primitive calls) in 0.176 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.158    0.000    0.158    0.000 ext.pyx:400(put)
  762/382    0.004    0.000    0.164    0.000 ext.pyx:318(__setitem__)
    381/0    0.002    0.000    0.000          ext.pyx:897(genexpr)
  763/382    0.002    0.000    0.004    0.000 ext.pyx:1097(create_chunk)
      382    0.002    0.000    0.003    0.000 ext.pyx:245(__cinit__)
    381/0    0.001    0.000    0.000          ext.pyx:911(genexpr)
      763    0.001    0.000    0.002    0.000 numeric.py:1970(isscalar)
      762    0.001    0.000    0.001    0.000 {built-in method array}
  384/383    0.001    0.000    0.001    0.000 ext.pyx:143(_normalize_shape)
      382    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
      763    0.001    0.000    0.001    0.000 {built-in method isinstance}
      381    0.001    0.000    0.002    0.000 nume

In [10]:
cProfile.run('zarr.array(a, chunks=c.chunklen, cname="lz4", lazy=True)', sort='time')

         10333 function calls (8801 primitive calls) in 0.191 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.168    0.000    0.168    0.000 ext.pyx:400(put)
      382    0.003    0.000    0.175    0.000 ext.pyx:318(__setitem__)
  763/382    0.002    0.000    0.007    0.000 ext.pyx:1417(get_chunk)
  763/382    0.002    0.000    0.175    0.000 ext.pyx:450(__setitem__)
    381/0    0.002    0.000    0.000          ext.pyx:897(genexpr)
    381/0    0.002    0.000    0.000          ext.pyx:911(genexpr)
      382    0.001    0.000    0.003    0.000 ext.pyx:245(__cinit__)
      763    0.001    0.000    0.002    0.000 numeric.py:1970(isscalar)
      762    0.001    0.000    0.001    0.000 {built-in method array}
      382    0.001    0.000    0.006    0.000 ext.pyx:1302(_lazy_get_chunk)
      382    0.001    0.000    0.005    0.000 ext.pyx:1422(create_chunk)
      382    0.001    0.000    0.001    0.000 ext.pyx:152(

## 1D array read

In [11]:
%timeit c[:]

1 loops, best of 3: 161 ms per loop


In [12]:
%timeit z[:]

10 loops, best of 3: 166 ms per loop


In [13]:
%timeit zl[:]

1 loops, best of 3: 163 ms per loop


In [15]:
cProfile.run('z[:]', sort='time')

         3067 function calls (1920 primitive calls) in 0.204 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.193    0.001    0.193    0.001 ext.pyx:389(get)
  763/382    0.004    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
    381/0    0.003    0.000    0.000          ext.pyx:846(genexpr)
    381/0    0.003    0.000    0.000          ext.pyx:852(genexpr)
      2/1    0.001    0.000    0.002    0.002 <string>:1(<module>)
      382    0.001    0.000    0.001    0.000 ext.pyx:1094(get_chunk)
      2/1    0.000    0.000    0.001    0.001 ext.pyx:818(__getitem__)
        1    0.000    0.000    0.204    0.204 ext.pyx:827(genexpr)
      2/1    0.000    0.000    0.002    0.002 {built-in method exec}
      383    0.000    0.000    0.000    0.000 ext.pyx:382(__get__)
      382    0.000    0.000    0.000    0.000 ext.pyx:265(__get__)
        1    0.000    0.000    0.000    0.000 ext.pyx:655(_get_chunk_range)
  

In [17]:
cProfile.run('zl[:]', sort='time')

         3449 function calls (2302 primitive calls) in 0.201 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      382    0.191    0.000    0.191    0.000 ext.pyx:389(get)
  763/382    0.003    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
    381/0    0.002    0.000    0.000          ext.pyx:852(genexpr)
    381/0    0.002    0.000    0.000          ext.pyx:846(genexpr)
      2/1    0.001    0.000    0.001    0.001 <string>:1(<module>)
      382    0.001    0.000    0.001    0.000 ext.pyx:1417(get_chunk)
      382    0.000    0.000    0.000    0.000 ext.pyx:1302(_lazy_get_chunk)
      2/1    0.000    0.000    0.001    0.001 ext.pyx:818(__getitem__)
        1    0.000    0.000    0.201    0.201 ext.pyx:827(genexpr)
      2/1    0.000    0.000    0.001    0.001 {built-in method exec}
      383    0.000    0.000    0.000    0.000 ext.pyx:382(__get__)
      382    0.000    0.000    0.000    0.000 ext.pyx:265(__get__)
  

## 2D array creation

In [18]:
a = np.arange(1e8, dtype='i4').reshape((10000, 10000))
a

array([[       0,        1,        2, ...,     9997,     9998,     9999],
       [   10000,    10001,    10002, ...,    19997,    19998,    19999],
       [   20000,    20001,    20002, ...,    29997,    29998,    29999],
       ..., 
       [99970000, 99970001, 99970002, ..., 99979997, 99979998, 99979999],
       [99980000, 99980001, 99980002, ..., 99989997, 99989998, 99989999],
       [99990000, 99990001, 99990002, ..., 99999997, 99999998, 99999999]], dtype=int32)

In [19]:
c = bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))
c

carray((10000, 10000), int32)
  nbytes: 381.47 MB; cbytes: 7.75 MB; ratio: 49.25
  cparams := cparams(clevel=5, shuffle=True, cname='lz4')
[[       0        1        2 ...,     9997     9998     9999]
 [   10000    10001    10002 ...,    19997    19998    19999]
 [   20000    20001    20002 ...,    29997    29998    29999]
 ..., 
 [99970000 99970001 99970002 ..., 99979997 99979998 99979999]
 [99980000 99980001 99980002 ..., 99989997 99989998 99989999]
 [99990000 99990001 99990002 ..., 99999997 99999998 99999999]]

In [20]:
c.chunklen

26

In [21]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
z

zarr.ext.SynchronizedArray((10000, 10000), int32, chunks=(26, 10000))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.7M; ratio: 56.8; initialized: 385/385

In [22]:
zl = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)
zl

zarr.ext.SynchronizedLazyArray((10000, 10000), int32, chunks=(26, 10000))
  cname: lz4; clevel: 5; shuffle: 1 (BYTESHUFFLE)
  nbytes: 381.5M; cbytes: 6.7M; ratio: 56.8; initialized: 385/385

In [23]:
%timeit bcolz.carray(a, cparams=bcolz.cparams(cname='lz4'))

10 loops, best of 3: 132 ms per loop


In [24]:
%timeit zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')

10 loops, best of 3: 141 ms per loop


In [25]:
%timeit zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)

10 loops, best of 3: 140 ms per loop


In [26]:
cProfile.run('zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname="lz4")', sort='time')

         8873 function calls (7330 primitive calls) in 0.234 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      385    0.203    0.001    0.203    0.001 ext.pyx:400(put)
  768/385    0.006    0.000    0.211    0.001 ext.pyx:318(__setitem__)
  769/385    0.004    0.000    0.009    0.000 ext.pyx:1097(create_chunk)
      385    0.003    0.000    0.007    0.000 ext.pyx:245(__cinit__)
    384/0    0.003    0.000    0.000          ext.pyx:897(genexpr)
    384/0    0.003    0.000    0.000          ext.pyx:911(genexpr)
  387/386    0.003    0.000    0.003    0.000 ext.pyx:143(_normalize_shape)
      769    0.002    0.000    0.003    0.000 numeric.py:1970(isscalar)
      768    0.002    0.000    0.002    0.000 {built-in method array}
      385    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
      386    0.001    0.000    0.001    0.000 ext.pyx:95(_normalize_cparams)
      769    0.001    0.000    0.001    0.000 {b

In [27]:
cProfile.run('zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname="lz4", lazy=True)', sort='time')

         10415 function calls (8871 primitive calls) in 0.203 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      385    0.177    0.000    0.177    0.000 ext.pyx:400(put)
      385    0.003    0.000    0.184    0.000 ext.pyx:318(__setitem__)
    384/0    0.003    0.000    0.000          ext.pyx:897(genexpr)
  769/385    0.002    0.000    0.008    0.000 ext.pyx:1417(get_chunk)
  769/385    0.002    0.000    0.185    0.000 ext.pyx:450(__setitem__)
    384/0    0.002    0.000    0.000          ext.pyx:911(genexpr)
      385    0.002    0.000    0.003    0.000 ext.pyx:245(__cinit__)
      769    0.002    0.000    0.002    0.000 numeric.py:1970(isscalar)
      385    0.001    0.000    0.007    0.000 ext.pyx:1302(_lazy_get_chunk)
      768    0.001    0.000    0.001    0.000 {built-in method array}
      385    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
      385    0.001    0.000    0.006    0.000 ext.pyx:14

In [28]:
%timeit zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')

1 loops, best of 3: 193 ms per loop


In [29]:
%timeit zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4', lazy=True)

1 loops, best of 3: 204 ms per loop


In [30]:
cProfile.run('zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname="lz4")', sort='time')

         8952 function calls (7389 primitive calls) in 0.255 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      390    0.177    0.000    0.177    0.000 ext.pyx:400(put)
      760    0.054    0.000    0.054    0.000 {built-in method array}
  778/390    0.006    0.000    0.238    0.001 ext.pyx:318(__setitem__)
  779/390    0.003    0.000    0.006    0.000 ext.pyx:1097(create_chunk)
      390    0.003    0.000    0.004    0.000 ext.pyx:245(__cinit__)
    389/0    0.003    0.000    0.000          ext.pyx:897(genexpr)
    389/0    0.002    0.000    0.000          ext.pyx:911(genexpr)
      770    0.002    0.000    0.002    0.000 numeric.py:1970(isscalar)
  392/391    0.001    0.000    0.001    0.000 ext.pyx:143(_normalize_shape)
      390    0.001    0.000    0.001    0.000 ext.pyx:152(_is_total_slice)
      380    0.001    0.000    0.055    0.000 numeric.py:527(ascontiguousarray)
      770    0.001    0.000    0.001    0.000

In [31]:
cProfile.run('zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname="lz4", lazy=True)', sort='time')

         10514 function calls (8950 primitive calls) in 0.233 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      390    0.158    0.000    0.159    0.000 ext.pyx:400(put)
      760    0.050    0.000    0.050    0.000 {built-in method array}
      390    0.004    0.000    0.215    0.001 ext.pyx:318(__setitem__)
    389/0    0.003    0.000    0.000          ext.pyx:911(genexpr)
  779/390    0.003    0.000    0.008    0.000 ext.pyx:1417(get_chunk)
  779/390    0.002    0.000    0.216    0.001 ext.pyx:450(__setitem__)
    389/0    0.002    0.000    0.000          ext.pyx:897(genexpr)
      390    0.002    0.000    0.003    0.000 ext.pyx:245(__cinit__)
      770    0.001    0.000    0.002    0.000 numeric.py:1970(isscalar)
      390    0.001    0.000    0.007    0.000 ext.pyx:1302(_lazy_get_chunk)
  392/391    0.001    0.000    0.001    0.000 ext.pyx:143(_normalize_shape)
      390    0.001    0.000    0.005    0.000 ext.pyx:1

## 2D array read

In [32]:
%timeit c[:]

1 loops, best of 3: 193 ms per loop


In [33]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
%timeit z[:]

10 loops, best of 3: 187 ms per loop


In [35]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)
%timeit z[:]

10 loops, best of 3: 188 ms per loop


In [34]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')
%timeit z[:]

1 loops, best of 3: 256 ms per loop


In [36]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4', lazy=True)
%timeit z[:]

1 loops, best of 3: 256 ms per loop


## 2D array slices

In [37]:
%timeit c[1000:2000]

100 loops, best of 3: 16.6 ms per loop


In [38]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
%timeit z[1000:2000]

100 loops, best of 3: 17.6 ms per loop


In [39]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4', lazy=True)
%timeit z[1000:2000]

100 loops, best of 3: 17.1 ms per loop


In [40]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')
%timeit z[1000:2000]

10 loops, best of 3: 27.2 ms per loop


In [41]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4', lazy=True)
%timeit z[1000:2000]

10 loops, best of 3: 28.4 ms per loop


In [42]:
%timeit c[:, 1000:2000]

1 loops, best of 3: 207 ms per loop


In [43]:
z = zarr.array(a, chunks=(c.chunklen, a.shape[1]), cname='lz4')
%timeit z[:, 1000:2000]

10 loops, best of 3: 148 ms per loop


In [44]:
z = zarr.array(a, chunks=(c.chunklen*10, a.shape[1]//10), cname='lz4')
%timeit z[:, 1000:2000]

100 loops, best of 3: 17.4 ms per loop
