Skip to content

Commit

Permalink
Merge pull request #70 from alimanfoo/issue_55
Browse files Browse the repository at this point in the history
blosc returns bytes; resolves #55
  • Loading branch information
alimanfoo committed Sep 9, 2016
2 parents 1ddaa66 + cc11c65 commit c8db5b1
Show file tree
Hide file tree
Showing 6 changed files with 999 additions and 609 deletions.
5 changes: 5 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Release notes
=============

* The Blosc extension has been modified to return bytes instead of array
objects from compress and decompress function calls. This should
improve compatibility and also provides a small performance increase for
compressing high compression ratio data
(`#55 <https://github.com/alimanfoo/zarr/issues/55>`_).
* Added ``overwrite`` keyword argument to array and group creation methods
on the :class:`zarr.hierarchy.Group` class
(`#71 <https://github.com/alimanfoo/zarr/issues/71>`_).
Expand Down
200 changes: 200 additions & 0 deletions notebooks/.ipynb_checkpoints/blosc_microbench-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'2.0.1'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import zarr\n",
"zarr.__version__"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 110 ms per loop\n",
"1 loop, best of 3: 235 ms per loop\n",
"Array((100000000,), int64, chunks=(200000,), order=C)\n",
" nbytes: 762.9M; nbytes_stored: 11.2M; ratio: 67.8; initialized: 500/500\n",
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
" store: dict\n"
]
}
],
"source": [
"z = zarr.empty(shape=100000000, chunks=200000, dtype='i8')\n",
"data = np.arange(100000000, dtype='i8')\n",
"%timeit z[:] = data\n",
"%timeit z[:]\n",
"print(z)\n",
"assert np.all(z[:] == data)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loop, best of 3: 331 ms per loop\n",
"1 loop, best of 3: 246 ms per loop\n",
"Array((100000000,), float64, chunks=(200000,), order=C)\n",
" nbytes: 762.9M; nbytes_stored: 724.8M; ratio: 1.1; initialized: 500/500\n",
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
" store: dict\n"
]
}
],
"source": [
"z = zarr.empty(shape=100000000, chunks=200000, dtype='f8')\n",
"data = np.random.normal(size=100000000)\n",
"%timeit z[:] = data\n",
"%timeit z[:]\n",
"print(z)\n",
"assert np.all(z[:] == data)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'2.0.2.dev0+dirty'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import sys\n",
"sys.path.insert(0, '..')\n",
"import zarr\n",
"zarr.__version__"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10 loops, best of 3: 92.7 ms per loop\n",
"1 loop, best of 3: 230 ms per loop\n",
"Array((100000000,), int64, chunks=(200000,), order=C)\n",
" nbytes: 762.9M; nbytes_stored: 11.2M; ratio: 67.8; initialized: 500/500\n",
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
" store: dict\n"
]
}
],
"source": [
"z = zarr.empty(shape=100000000, chunks=200000, dtype='i8')\n",
"data = np.arange(100000000, dtype='i8')\n",
"%timeit z[:] = data\n",
"%timeit z[:]\n",
"print(z)\n",
"assert np.all(z[:] == data)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loop, best of 3: 338 ms per loop\n",
"1 loop, best of 3: 253 ms per loop\n",
"Array((100000000,), float64, chunks=(200000,), order=C)\n",
" nbytes: 762.9M; nbytes_stored: 724.8M; ratio: 1.1; initialized: 500/500\n",
" compressor: Blosc(cname='lz4', clevel=5, shuffle=1)\n",
" store: dict\n"
]
}
],
"source": [
"z = zarr.empty(shape=100000000, chunks=200000, dtype='f8')\n",
"data = np.random.normal(size=100000000)\n",
"%timeit z[:] = data\n",
"%timeit z[:]\n",
"print(z)\n",
"assert np.all(z[:] == data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

0 comments on commit c8db5b1

Please sign in to comment.