Skip to content

Commit

Permalink
Merge pull request #72 from alimanfoo/misc_20160908
Browse files Browse the repository at this point in the history
Various minor fixes and improvements
  • Loading branch information
alimanfoo committed Sep 9, 2016
2 parents 96c34bb + 7b741ae commit 1ddaa66
Show file tree
Hide file tree
Showing 14 changed files with 700 additions and 433 deletions.
Empty file added docs/_static/donotdelete
Empty file.
8 changes: 8 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Release notes
=============

* Added ``overwrite`` keyword argument to array and group creation methods
on the :class:`zarr.hierarchy.Group` class
(`#71 <https://github.com/alimanfoo/zarr/issues/71>`_).
* Added ``cache_metadata`` keyword argument to array creation methods.
* The functions :func:`zarr.creation.open_array` and
:func:`zarr.hierarchy.open_group` now accept any store as first argument
(`#56 <https://github.com/alimanfoo/zarr/issues/56>`_).

.. _release_2.0.1:

2.0.1
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ the delta filter::
... chunks=(1000, 1000), compressor=compressor)
>>> z
Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
nbytes: 381.5M; nbytes_stored: 248.9K; ratio: 1569.6; initialized: 100/100
nbytes: 381.5M; nbytes_stored: 248.9K; ratio: 1569.7; initialized: 100/100
compressor: LZMA(format=1, check=-1, preset=None, filters=[{'dist': 4, 'id': 3}, {'preset': 1, 'id': 33}])
store: dict

Expand Down Expand Up @@ -327,7 +327,7 @@ provided that all processes have access to a shared file system. E.g.::
... synchronizer=synchronizer)
>>> z
Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
nbytes: 381.5M; nbytes_stored: 326; ratio: 1226993.9; initialized: 0/100
nbytes: 381.5M; nbytes_stored: 323; ratio: 1238390.1; initialized: 0/100
compressor: Blosc(cname='lz4', clevel=5, shuffle=1)
store: DirectoryStore; synchronizer: ProcessSynchronizer

Expand Down
93 changes: 69 additions & 24 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from zarr.storage import array_meta_key, attrs_key, listdir, getsize
from zarr.meta import decode_array_metadata, encode_array_metadata
from zarr.attrs import Attributes
from zarr.errors import PermissionError
from zarr.errors import PermissionError, err_read_only, err_array_not_found
from zarr.compat import reduce
from zarr.codecs import get_codec

Expand All @@ -34,6 +34,11 @@ class Array(object):
for storage of both chunks and metadata.
synchronizer : object, optional
Array synchronizer.
cache_metadata : bool, optional
If True, array configuration metadata will be cached for the
lifetime of the object. If False, array metadata will be reloaded
prior to all data access and modification operations (may incur
overhead depending on storage and data access pattern).
Attributes
----------
Expand All @@ -56,8 +61,9 @@ class Array(object):
itemsize
nbytes
nbytes_stored
initialized
cdata_shape
nchunks
nchunks_initialized
is_view
Methods
Expand All @@ -71,7 +77,7 @@ class Array(object):
""" # flake8: noqa

def __init__(self, store, path=None, read_only=False, chunk_store=None,
synchronizer=None):
synchronizer=None, cache_metadata=True):
# N.B., expect at this point store is fully initialized with all
# configuration metadata fully specified and normalized

Expand All @@ -87,13 +93,23 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
else:
self._chunk_store = chunk_store
self._synchronizer = synchronizer
self._cache_metadata = cache_metadata
self._is_view = False

# initialize metadata
self._load_metadata()

# initialize attributes
akey = self._key_prefix + attrs_key
self._attrs = Attributes(store, key=akey, read_only=read_only,
synchronizer=synchronizer)

def _load_metadata(self):
try:
mkey = self._key_prefix + array_meta_key
meta_bytes = store[mkey]
meta_bytes = self._store[mkey]
except KeyError:
raise ValueError('store has no metadata')
err_array_not_found(self._path)
else:

# decode and store metadata
Expand All @@ -104,7 +120,6 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._dtype = meta['dtype']
self._fill_value = meta['fill_value']
self._order = meta['order']
self._is_view = False

# setup compressor
config = meta['compressor']
Expand All @@ -119,14 +134,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
filters = [get_codec(f) for f in filters]
self._filters = filters

# initialize attributes
akey = self._key_prefix + attrs_key
self._attrs = Attributes(store, key=akey, read_only=read_only,
synchronizer=synchronizer)

def _flush_metadata(self):
if self._is_view:
raise PermissionError('operation not permitted for views')
raise PermissionError('not permitted for views')

if self._compressor:
compressor_config = self._compressor.get_config()
else:
Expand Down Expand Up @@ -253,12 +264,6 @@ def nbytes_stored(self):
else:
return m + n

@property
def initialized(self):
"""The number of chunks that have been initialized with some data."""
return sum(1 for k in listdir(self._chunk_store, self._path)
if k not in [array_meta_key, attrs_key])

@property
def cdata_shape(self):
"""A tuple of integers describing the number of chunks along each
Expand All @@ -267,6 +272,20 @@ def cdata_shape(self):
int(np.ceil(s / c)) for s, c in zip(self._shape, self._chunks)
)

@property
def nchunks(self):
"""Total number of chunks."""
return reduce(operator.mul, self.cdata_shape)

@property
def nchunks_initialized(self):
"""The number of chunks that have been initialized with some data."""
return sum(1 for k in listdir(self._chunk_store, self._path)
if k not in [array_meta_key, attrs_key])

# backwards compability
initialized = nchunks_initialized

@property
def is_view(self):
"""A boolean, True if this array is a view on another array."""
Expand Down Expand Up @@ -366,6 +385,10 @@ def __getitem__(self, item):
""" # flake8: noqa

# refresh metadata
if not self._cache_metadata:
self._load_metadata()

# normalize selection
selection = normalize_array_selection(item, self._shape)

Expand Down Expand Up @@ -482,7 +505,11 @@ def __setitem__(self, key, value):

# guard conditions
if self._read_only:
raise PermissionError('array is read-only')
err_read_only()

# refresh metadata
if not self._cache_metadata:
self._load_metadata()

# normalize selection
selection = normalize_array_selection(key, self._shape)
Expand Down Expand Up @@ -717,6 +744,10 @@ def _encode_chunk(self, chunk):

def __repr__(self):

# refresh metadata
if not self._cache_metadata:
self._load_metadata()

# main line
r = '%s(' % type(self).__name__
if self.name:
Expand All @@ -733,8 +764,8 @@ def __repr__(self):
r += '; nbytes_stored: %s' % human_readable_size(
self.nbytes_stored)
r += '; ratio: %.1f' % (self.nbytes / self.nbytes_stored)
n_chunks = reduce(operator.mul, self.cdata_shape)
r += '; initialized: %s/%s' % (self.initialized, n_chunks)
r += '; initialized: %s/%s' % (self.nchunks_initialized,
self.nchunks)

# filters
if self.filters:
Expand Down Expand Up @@ -768,15 +799,28 @@ def _write_op(self, f, *args, **kwargs):

# guard condition
if self._read_only:
raise PermissionError('array is read-only')
err_read_only()

# synchronization
if self._synchronizer is None:

# refresh metadata
if not self._cache_metadata:
self._load_metadata()

return f(*args, **kwargs)

else:

# synchronize on the array
mkey = self._key_prefix + array_meta_key

with self._synchronizer[mkey]:

# refresh metadata
if not self._cache_metadata:
self._load_metadata()

return f(*args, **kwargs)

def resize(self, *args):
Expand Down Expand Up @@ -1022,7 +1066,7 @@ def view(self, shape=None, chunks=None, dtype=None,
... v.resize(20000)
... except PermissionError as e:
... print(e)
operation not permitted for views
not permitted for views
""" # flake8: noqa

Expand All @@ -1034,7 +1078,8 @@ def view(self, shape=None, chunks=None, dtype=None,
if synchronizer is None:
synchronizer = self._synchronizer
a = Array(store=store, path=path, chunk_store=chunk_store,
read_only=read_only, synchronizer=synchronizer)
read_only=read_only, synchronizer=synchronizer,
cache_metadata=True)
a._is_view = True

# allow override of some properties
Expand Down

0 comments on commit 1ddaa66

Please sign in to comment.