From 499f45ea8149eacffa717b63b7afc4384f30b18c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 18:21:40 +0000 Subject: [PATCH] style: pre-commit fixes --- bench/compress_normal.py | 25 +- docs/conf.py | 197 ++- zarr/__init__.py | 69 +- zarr/_storage/absstore.py | 83 +- zarr/_storage/store.py | 112 +- zarr/_storage/v3.py | 197 ++- zarr/_storage/v3_storage_transformers.py | 75 +- zarr/attrs.py | 31 +- zarr/context.py | 4 +- zarr/convenience.py | 338 +++-- zarr/core.py | 311 ++-- zarr/creation.py | 266 ++-- zarr/errors.py | 5 +- zarr/hierarchy.py | 487 ++++--- zarr/indexing.py | 230 ++- zarr/meta.py | 59 +- zarr/meta_v1.py | 40 +- zarr/n5.py | 345 +++-- zarr/storage.py | 661 +++++---- zarr/tests/test_attrs.py | 277 ++-- zarr/tests/test_convenience.py | 664 +++++---- zarr/tests/test_core.py | 1702 ++++++++++++---------- zarr/tests/test_creation.py | 287 ++-- zarr/tests/test_dim_separator.py | 41 +- zarr/tests/test_filters.py | 57 +- zarr/tests/test_hierarchy.py | 1142 ++++++++------- zarr/tests/test_indexing.py | 425 +++--- zarr/tests/test_info.py | 53 +- zarr/tests/test_meta.py | 333 +++-- zarr/tests/test_meta_array.py | 2 +- zarr/tests/test_n5.py | 15 +- zarr/tests/test_storage.py | 1639 ++++++++++----------- zarr/tests/test_storage_v3.py | 379 ++--- zarr/tests/test_sync.py | 184 ++- zarr/tests/test_util.py | 116 +- zarr/tests/util.py | 24 +- zarr/util.py | 267 ++-- 37 files changed, 5855 insertions(+), 5287 deletions(-) diff --git a/bench/compress_normal.py b/bench/compress_normal.py index ce0a05b9ec..9f1655541c 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -9,36 +9,39 @@ if __name__ == "__main__": - sys.path.insert(0, '..') + sys.path.insert(0, "..") # setup - a = np.random.normal(2000, 1000, size=200000000).astype('u2') - z = zarr.empty_like(a, chunks=1000000, - compression='blosc', - compression_opts=dict(cname='lz4', clevel=5, shuffle=2)) + a = np.random.normal(2000, 1000, size=200000000).astype("u2") + z = zarr.empty_like( + a, + chunks=1000000, + compression="blosc", + compression_opts=dict(cname="lz4", clevel=5, shuffle=2), + ) print(z) - print('*' * 79) + print("*" * 79) # time - t = timeit.repeat('z[:] = a', repeat=10, number=1, globals=globals()) + t = timeit.repeat("z[:] = a", repeat=10, number=1, globals=globals()) print(t) print(min(t)) print(z) # profile profile = line_profiler.LineProfiler(blosc.compress) - profile.run('z[:] = a') + profile.run("z[:] = a") profile.print_stats() - print('*' * 79) + print("*" * 79) # time - t = timeit.repeat('z[:]', repeat=10, number=1, globals=globals()) + t = timeit.repeat("z[:]", repeat=10, number=1, globals=globals()) print(t) print(min(t)) # profile profile = line_profiler.LineProfiler(blosc.decompress) - profile.run('z[:]') + profile.run("z[:]") profile.print_stats() diff --git a/docs/conf.py b/docs/conf.py index 413d648732..f85ecb7454 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,50 +26,50 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath('..')) +sys.path.append(os.path.abspath("..")) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.viewcode', - 'sphinx.ext.intersphinx', - 'numpydoc', - 'sphinx_issues', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "numpydoc", + "sphinx_issues", "sphinx_copybutton", - "sphinx_design" + "sphinx_design", ] numpydoc_show_class_members = False numpydoc_class_members_toctree = False -issues_github_path = 'zarr-developers/zarr-python' +issues_github_path = "zarr-developers/zarr-python" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The main toctree document. -main_doc = 'index' +main_doc = "index" # General information about the project. -project = 'zarr' -copyright = '2022, Zarr Developers' -author = 'Zarr Developers' +project = "zarr" +copyright = "2022, Zarr Developers" +author = "Zarr Developers" version = zarr.__version__ # The full version, including alpha/beta/rc tags. @@ -80,42 +80,42 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'talks'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "talks"] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -125,181 +125,174 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'pydata_sphinx_theme' +html_theme = "pydata_sphinx_theme" -html_favicon = '_static/logo1.png' +html_favicon = "_static/logo1.png" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - "github_url": "https://github.com/zarr-developers/zarr-python", - "twitter_url": "https://twitter.com/zarr_dev", - "icon_links": [ - { - "name": "Zarr Dev", - "url": "https://zarr.dev/", - "icon": "_static/logo1.png", - "type": "local" - }, - ], - "collapse_navigation": True + "github_url": "https://github.com/zarr-developers/zarr-python", + "twitter_url": "https://twitter.com/zarr_dev", + "icon_links": [ + { + "name": "Zarr Dev", + "url": "https://zarr.dev/", + "icon": "_static/logo1.png", + "type": "local", + }, + ], + "collapse_navigation": True, } # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. -#html_title = 'zarr v@@' +# html_title = 'zarr v@@' # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = '_static/logo1.png' +html_logo = "_static/logo1.png" # Add custom css def setup(app): - app.add_css_file('custom.css') + app.add_css_file("custom.css") # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_js_files = [ - 'custom.js', + "custom.js", ] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. -#html_last_updated_fmt = None +# html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'zarrdoc' +htmlhelp_basename = "zarrdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (main_doc, 'zarr.tex', 'Zarr-Python', - author, 'manual'), + (main_doc, "zarr.tex", "Zarr-Python", author, "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (main_doc, 'zarr', 'Zarr-Python', - [author], 1) -] +man_pages = [(main_doc, "zarr", "Zarr-Python", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -308,30 +301,36 @@ def setup(app): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (main_doc, 'zarr', 'Zarr-Python', - author, 'zarr', 'One line description of project.', - 'Miscellaneous'), + ( + main_doc, + "zarr", + "Zarr-Python", + author, + "zarr", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. # use in refs e.g: # :ref:`comparison manual ` intersphinx_mapping = { - 'python': ('https://docs.python.org/', None), - 'numpy': ('https://numpy.org/doc/stable/', None), + "python": ("https://docs.python.org/", None), + "numpy": ("https://numpy.org/doc/stable/", None), } diff --git a/zarr/__init__.py b/zarr/__init__.py index 4d2c992dbf..6cecb40af8 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -1,20 +1,53 @@ # flake8: noqa from zarr.codecs import * -from zarr.convenience import (consolidate_metadata, copy, copy_all, copy_store, - load, open, open_consolidated, save, save_array, - save_group, tree) +from zarr.convenience import ( + consolidate_metadata, + copy, + copy_all, + copy_store, + load, + open, + open_consolidated, + save, + save_array, + save_group, + tree, +) from zarr.core import Array -from zarr.creation import (array, create, empty, empty_like, full, full_like, - ones, ones_like, open_array, open_like, zeros, - zeros_like) +from zarr.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + open_array, + open_like, + zeros, + zeros_like, +) from zarr.errors import CopyError, MetadataError from zarr.hierarchy import Group, group, open_group from zarr.n5 import N5Store, N5FSStore from zarr._storage.store import v3_api_available -from zarr.storage import (ABSStore, DBMStore, DictStore, DirectoryStore, - KVStore, LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, - NestedDirectoryStore, RedisStore, SQLiteStore, - TempStore, ZipStore) +from zarr.storage import ( + ABSStore, + DBMStore, + DictStore, + DirectoryStore, + KVStore, + LMDBStore, + LRUStoreCache, + MemoryStore, + MongoDBStore, + NestedDirectoryStore, + RedisStore, + SQLiteStore, + TempStore, + ZipStore, +) from zarr.sync import ProcessSynchronizer, ThreadSynchronizer from zarr.version import version as __version__ @@ -22,6 +55,16 @@ assert not __version__.startswith("0.0.0") if v3_api_available: - from zarr._storage.v3 import (ABSStoreV3, DBMStoreV3, KVStoreV3, DirectoryStoreV3, - LMDBStoreV3, LRUStoreCacheV3, MemoryStoreV3, MongoDBStoreV3, - RedisStoreV3, SQLiteStoreV3, ZipStoreV3) + from zarr._storage.v3 import ( + ABSStoreV3, + DBMStoreV3, + KVStoreV3, + DirectoryStoreV3, + LMDBStoreV3, + LRUStoreCacheV3, + MemoryStoreV3, + MongoDBStoreV3, + RedisStoreV3, + SQLiteStoreV3, + ZipStoreV3, + ) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index cc41018f9e..613062306a 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -6,7 +6,7 @@ from zarr._storage.store import _get_metadata_suffix, data_root, meta_root, Store, StoreV3 __doctest_requires__ = { - ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], + ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], } @@ -60,10 +60,16 @@ class ABSStore(Store): ``azure-storage-blob>=12.5.0``. """ - def __init__(self, container=None, prefix='', account_name=None, account_key=None, - blob_service_kwargs=None, dimension_separator=None, - client=None, - ): + def __init__( + self, + container=None, + prefix="", + account_name=None, + account_key=None, + blob_service_kwargs=None, + dimension_separator=None, + client=None, + ): self._dimension_separator = dimension_separator self.prefix = normalize_storage_path(prefix) if client is None: @@ -75,11 +81,14 @@ def __init__(self, container=None, prefix='', account_name=None, account_key=Non ) warnings.warn(msg, FutureWarning, stacklevel=2) from azure.storage.blob import ContainerClient + blob_service_kwargs = blob_service_kwargs or {} client = ContainerClient( - "https://{}.blob.core.windows.net/".format(account_name), container, - credential=account_key, **blob_service_kwargs - ) + "https://{}.blob.core.windows.net/".format(account_name), + container, + credential=account_key, + **blob_service_kwargs + ) self.client = client self._container = container @@ -88,8 +97,10 @@ def __init__(self, container=None, prefix='', account_name=None, account_key=Non @staticmethod def _warn_deprecated(property_): - msg = ("The {} property is deprecated and will be removed in a future " - "version. Get the property from 'ABSStore.client' instead.") + msg = ( + "The {} property is deprecated and will be removed in a future " + "version. Get the property from 'ABSStore.client' instead." + ) warnings.warn(msg.format(property_), FutureWarning, stacklevel=3) @property @@ -108,10 +119,10 @@ def account_key(self): return self._account_key def _append_path_to_prefix(self, path): - if self.prefix == '': + if self.prefix == "": return normalize_storage_path(path) else: - return '/'.join([self.prefix, normalize_storage_path(path)]) + return "/".join([self.prefix, normalize_storage_path(path)]) @staticmethod def _strip_prefix_from_path(path, prefix): @@ -119,17 +130,18 @@ def _strip_prefix_from_path(path, prefix): path_norm = normalize_storage_path(path) prefix_norm = normalize_storage_path(prefix) if prefix: - return path_norm[(len(prefix_norm)+1):] + return path_norm[(len(prefix_norm) + 1) :] else: return path_norm def __getitem__(self, key): from azure.core.exceptions import ResourceNotFoundError + blob_name = self._append_path_to_prefix(key) try: return self.client.download_blob(blob_name).readall() except ResourceNotFoundError: - raise KeyError('Blob %s not found' % blob_name) + raise KeyError("Blob %s not found" % blob_name) def __setitem__(self, key, value): value = ensure_bytes(value) @@ -138,16 +150,17 @@ def __setitem__(self, key, value): def __delitem__(self, key): from azure.core.exceptions import ResourceNotFoundError + try: self.client.delete_blob(self._append_path_to_prefix(key)) except ResourceNotFoundError: - raise KeyError('Blob %s not found' % key) + raise KeyError("Blob %s not found" % key) def __eq__(self, other): return ( - isinstance(other, ABSStore) and - self.client == other.client and - self.prefix == other.prefix + isinstance(other, ABSStore) + and self.client == other.client + and self.prefix == other.prefix ) def keys(self): @@ -155,7 +168,7 @@ def keys(self): def __iter__(self): if self.prefix: - list_blobs_prefix = self.prefix + '/' + list_blobs_prefix = self.prefix + "/" else: list_blobs_prefix = None for blob in self.client.list_blobs(list_blobs_prefix): @@ -171,17 +184,17 @@ def __contains__(self, key): def listdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: - dir_path += '/' + dir_path += "/" items = [ self._strip_prefix_from_path(blob.name, dir_path) - for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/') + for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter="/") ] return items def rmdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: - dir_path += '/' + dir_path += "/" for blob in self.client.list_blobs(name_starts_with=dir_path): self.client.delete_blob(blob) @@ -197,11 +210,11 @@ def getsize(self, path=None): return blob_client.get_blob_properties().size else: size = 0 - if fs_path == '': + if fs_path == "": fs_path = None - elif not fs_path.endswith('/'): - fs_path += '/' - for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'): + elif not fs_path.endswith("/"): + fs_path += "/" + for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter="/"): blob_client = self.client.get_blob_client(blob) if blob_client.exists(): size += blob_client.get_blob_properties().size @@ -212,15 +225,14 @@ def clear(self): class ABSStoreV3(ABSStore, StoreV3): - def list(self): return list(self.keys()) def __eq__(self, other): return ( - isinstance(other, ABSStoreV3) and - self.client == other.client and - self.prefix == other.prefix + isinstance(other, ABSStoreV3) + and self.client == other.client + and self.prefix == other.prefix ) def __setitem__(self, key, value): @@ -234,24 +246,24 @@ def rmdir(self, path=None): # If we disallow an empty path then we will need to modify # TestABSStoreV3 to have the create_store method use a prefix. - ABSStore.rmdir(self, '') + ABSStore.rmdir(self, "") return meta_dir = meta_root + path - meta_dir = meta_dir.rstrip('/') + meta_dir = meta_dir.rstrip("/") ABSStore.rmdir(self, meta_dir) # remove data folder data_dir = data_root + path - data_dir = data_dir.rstrip('/') + data_dir = data_dir.rstrip("/") ABSStore.rmdir(self, data_dir) # remove metadata files sfx = _get_metadata_suffix(self) - array_meta_file = meta_dir + '.array' + sfx + array_meta_file = meta_dir + ".array" + sfx if array_meta_file in self: del self[array_meta_file] - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx if group_meta_file in self: del self[group_meta_file] @@ -259,6 +271,7 @@ def rmdir(self, path=None): # For now, calling the generic keys-based _getsize def getsize(self, path=None): from zarr.storage import _getsize # avoid circular import + return _getsize(self, path) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 0594dc22de..e8c57e95e4 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -11,17 +11,17 @@ from zarr.context import Context # v2 store keys -array_meta_key = '.zarray' -group_meta_key = '.zgroup' -attrs_key = '.zattrs' +array_meta_key = ".zarray" +group_meta_key = ".zgroup" +attrs_key = ".zattrs" # v3 paths -meta_root = 'meta/root/' -data_root = 'data/root/' +meta_root = "meta/root/" +data_root = "data/root/" DEFAULT_ZARR_VERSION = 2 -v3_api_available = os.environ.get('ZARR_V3_EXPERIMENTAL_API', '0').lower() not in ['0', 'false'] +v3_api_available = os.environ.get("ZARR_V3_EXPERIMENTAL_API", "0").lower() not in ["0", "false"] def assert_zarr_v3_api_available(): @@ -229,11 +229,11 @@ def _validate_key(self, key: str): ): raise ValueError("keys starts with unexpected value: `{}`".format(key)) - if key.endswith('/'): + if key.endswith("/"): raise ValueError("keys may not end in /") def list_prefix(self, prefix): - if prefix.startswith('/'): + if prefix.startswith("/"): raise ValueError("prefix must not begin with /") # TODO: force prefix to end with /? return [k for k in self.list() if k.startswith(prefix)] @@ -294,8 +294,7 @@ def supports_efficient_get_partial_values(self): return False def get_partial_values( - self, - key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] + self, key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] ) -> List[Union[bytes, memoryview, bytearray]]: """Get multiple partial values. key_ranges can be an iterable of key, range pairs, @@ -306,11 +305,11 @@ def get_partial_values( from the end of the file. A key may occur multiple times with different ranges. Inserts None for missing keys into the returned list.""" - results: List[Union[bytes, memoryview, bytearray]] = ( - [None] * len(key_ranges) # type: ignore[list-item] - ) - indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = ( - defaultdict(list) + results: List[Union[bytes, memoryview, bytearray]] = [None] * len( + key_ranges + ) # type: ignore[list-item] + indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = defaultdict( + list ) for i, (key, range_) in enumerate(key_ranges): indexed_ranges_by_key[key].append((i, range_)) @@ -323,7 +322,7 @@ def get_partial_values( if range_length is None: results[i] = value[range_from:] else: - results[i] = value[range_from:range_from + range_length] + results[i] = value[range_from : range_from + range_length] return results def supports_efficient_set_partial_values(self): @@ -356,7 +355,7 @@ def set_partial_values(self, key_start_values): if start < 0: values[key][start:] = value else: - values[key][start:start + len(value)] = value + values[key][start : start + len(value)] = value for key, value in values.items(): self[key] = value @@ -377,14 +376,13 @@ def _ensure_store(store): We'll do this conversion in a few places automatically """ from zarr._storage.v3 import KVStoreV3 # avoid circular import + if store is None: return None elif isinstance(store, StoreV3): return store elif isinstance(store, Store): - raise ValueError( - f"cannot initialize a v3 store with a v{store._store_version} store" - ) + raise ValueError(f"cannot initialize a v3 store with a v{store._store_version} store") elif isinstance(store, MutableMapping): return KVStoreV3(store) else: @@ -444,10 +442,7 @@ def get_config(self): # Override in sub-class if need special encoding of config values. # By default, assume all non-private members are configuration # parameters except for type . - return { - k: v for k, v in self.__dict__.items() - if not k.startswith('_') and k != "type" - } + return {k: v for k, v in self.__dict__.items() if not k.startswith("_") and k != "type"} @classmethod def from_config(cls, _type, config): @@ -460,18 +455,18 @@ def from_config(cls, _type, config): @property def inner_store(self) -> Union["StorageTransformer", StoreV3]: - assert self._inner_store is not None, ( - "inner_store is not initialized, first get a copy via _copy_for_array." - ) + assert ( + self._inner_store is not None + ), "inner_store is not initialized, first get a copy via _copy_for_array." return self._inner_store # The following implementations are usually fine to keep as-is: def __eq__(self, other): return ( - type(self) == type(other) and - self._inner_store == other._inner_store and - self.get_config() == other.get_config() + type(self) == type(other) + and self._inner_store == other._inner_store + and self.get_config() == other.get_config() ) def erase(self, key): @@ -561,42 +556,41 @@ def set_partial_values(self, key_start_values): def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: - prefix = path + '/' + prefix = path + "/" else: - prefix = '' + prefix = "" return prefix def _get_hierarchy_metadata(store: StoreV3) -> Mapping[str, Any]: - version = getattr(store, '_store_version', 2) + version = getattr(store, "_store_version", 2) if version < 3: - raise ValueError("zarr.json hierarchy metadata not stored for " - f"zarr v{version} stores") - if 'zarr.json' not in store: + raise ValueError("zarr.json hierarchy metadata not stored for " f"zarr v{version} stores") + if "zarr.json" not in store: raise ValueError("zarr.json metadata not found in store") - return store._metadata_class.decode_hierarchy_metadata(store['zarr.json']) + return store._metadata_class.decode_hierarchy_metadata(store["zarr.json"]) def _get_metadata_suffix(store: StoreV3) -> str: - if 'zarr.json' in store: - return _get_hierarchy_metadata(store)['metadata_key_suffix'] - return '.json' + if "zarr.json" in store: + return _get_hierarchy_metadata(store)["metadata_key_suffix"] + return ".json" def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: """Rename source or group metadata file associated with src_path.""" any_renamed = False sfx = _get_metadata_suffix(store) - src_path = src_path.rstrip('/') - dst_path = dst_path.rstrip('/') - _src_array_json = meta_root + src_path + '.array' + sfx + src_path = src_path.rstrip("/") + dst_path = dst_path.rstrip("/") + _src_array_json = meta_root + src_path + ".array" + sfx if _src_array_json in store: - new_key = meta_root + dst_path + '.array' + sfx + new_key = meta_root + dst_path + ".array" + sfx store[new_key] = store.pop(_src_array_json) any_renamed = True - _src_group_json = meta_root + src_path + '.group' + sfx + _src_group_json = meta_root + src_path + ".group" + sfx if _src_group_json in store: - new_key = meta_root + dst_path + '.group' + sfx + new_key = meta_root + dst_path + ".group" + sfx store[new_key] = store.pop(_src_group_json) any_renamed = True return any_renamed @@ -606,7 +600,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) - version = getattr(store, '_store_version', 2) + version = getattr(store, "_store_version", 2) if version == 2: for key in list(store.keys()): if key.startswith(src_prefix): @@ -618,7 +612,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: _src_prefix = root_prefix + src_prefix _dst_prefix = root_prefix + dst_prefix for key in store.list_prefix(_src_prefix): # type: ignore - new_key = _dst_prefix + key[len(_src_prefix):] + new_key = _dst_prefix + key[len(_src_prefix) :] store[new_key] = store.pop(key) any_renamed = True any_meta_renamed = _rename_metadata_v3(store, src_path, dst_path) # type: ignore @@ -639,20 +633,20 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: meta_dir = meta_root + path - meta_dir = meta_dir.rstrip('/') + meta_dir = meta_dir.rstrip("/") _rmdir_from_keys(store, meta_dir) # remove data folder data_dir = data_root + path - data_dir = data_dir.rstrip('/') + data_dir = data_dir.rstrip("/") _rmdir_from_keys(store, data_dir) # remove metadata files sfx = _get_metadata_suffix(store) - array_meta_file = meta_dir + '.array' + sfx + array_meta_file = meta_dir + ".array" + sfx if array_meta_file in store: store.erase(array_meta_file) # type: ignore - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx if group_meta_file in store: store.erase(group_meta_file) # type: ignore @@ -663,8 +657,8 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str children = set() for key in list(store.keys()): if key.startswith(prefix) and len(key) > len(prefix): - suffix = key[len(prefix):] - child = suffix.split('/')[0] + suffix = key[len(prefix) :] + child = suffix.split("/")[0] children.add(child) return sorted(children) @@ -675,7 +669,7 @@ def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: if prefix: key = meta_root + prefix.rstrip("/") + ".array" + sfx else: - key = meta_root[:-1] + '.array' + sfx + key = meta_root[:-1] + ".array" + sfx else: key = prefix + array_meta_key return key @@ -685,9 +679,9 @@ def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: if getattr(store, "_store_version", 2) == 3: sfx = _get_metadata_suffix(store) # type: ignore if prefix: - key = meta_root + prefix.rstrip('/') + ".group" + sfx + key = meta_root + prefix.rstrip("/") + ".group" + sfx else: - key = meta_root[:-1] + '.group' + sfx + key = meta_root[:-1] + ".group" + sfx else: key = prefix + group_meta_key return key @@ -698,9 +692,9 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: # for v3, attributes are stored in the array metadata sfx = _get_metadata_suffix(store) # type: ignore if prefix: - key = meta_root + prefix.rstrip('/') + ".array" + sfx + key = meta_root + prefix.rstrip("/") + ".array" + sfx else: - key = meta_root[:-1] + '.array' + sfx + key = meta_root[:-1] + ".array" + sfx else: key = prefix + attrs_key return key diff --git a/zarr/_storage/v3.py b/zarr/_storage/v3.py index 094deed02e..ecf3d5e857 100644 --- a/zarr/_storage/v3.py +++ b/zarr/_storage/v3.py @@ -9,44 +9,60 @@ MetadataError, ReadOnlyError, ) -from zarr.util import (buffer_size, json_loads, normalize_storage_path) +from zarr.util import buffer_size, json_loads, normalize_storage_path from zarr._storage.absstore import ABSStoreV3 # noqa: F401 -from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - BaseStore, - Store, - StoreV3) -from zarr.storage import (DBMStore, ConsolidatedMetadataStore, DirectoryStore, FSStore, KVStore, - LMDBStore, LRUStoreCache, MemoryStore, MongoDBStore, RedisStore, - SQLiteStore, ZipStore, _getsize) +from zarr._storage.store import ( + _get_hierarchy_metadata, # noqa: F401 + _get_metadata_suffix, + _listdir_from_keys, + _rename_from_keys, + _rename_metadata_v3, + _rmdir_from_keys, + _rmdir_from_keys_v3, + _path_to_prefix, + _prefix_to_array_key, + _prefix_to_group_key, + array_meta_key, + attrs_key, + data_root, + group_meta_key, + meta_root, + BaseStore, + Store, + StoreV3, +) +from zarr.storage import ( + DBMStore, + ConsolidatedMetadataStore, + DirectoryStore, + FSStore, + KVStore, + LMDBStore, + LRUStoreCache, + MemoryStore, + MongoDBStore, + RedisStore, + SQLiteStore, + ZipStore, + _getsize, +) __doctest_requires__ = { - ('RedisStore', 'RedisStore.*'): ['redis'], - ('MongoDBStore', 'MongoDBStore.*'): ['pymongo'], - ('LRUStoreCache', 'LRUStoreCache.*'): ['s3fs'], + ("RedisStore", "RedisStore.*"): ["redis"], + ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], + ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], } try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc + default_compressor = Blosc() except ImportError: # pragma: no cover from zarr.codecs import Zlib + default_compressor = Zlib() @@ -55,7 +71,7 @@ StoreLike = Union[BaseStore, MutableMapping] -class RmdirV3(): +class RmdirV3: """Mixin class that can be used to ensure override of any existing v2 rmdir class.""" def rmdir(self, path: str = "") -> None: @@ -64,7 +80,6 @@ def rmdir(self, path: str = "") -> None: class KVStoreV3(RmdirV3, KVStore, StoreV3): - def list(self): return list(self._mutable_mapping.keys()) @@ -73,10 +88,7 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def __eq__(self, other): - return ( - isinstance(other, KVStoreV3) and - self._mutable_mapping == other._mutable_mapping - ) + return isinstance(other, KVStoreV3) and self._mutable_mapping == other._mutable_mapping KVStoreV3.__doc__ = KVStore.__doc__ @@ -122,15 +134,15 @@ def list(self): return list(self.keys()) def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip('/') + key = normalize_storage_path(key).lstrip("/") return key.lower() if self.normalize_keys else key def getsize(self, path=None): size = 0 - if path is None or path == '': + if path is None or path == "": # size of both the data and meta subdirs dirs = [] - for d in ['data/root', 'meta/root']: + for d in ["data/root", "meta/root"]: dir_path = os.path.join(self.path, d) if os.path.exists(dir_path): dirs.append(dir_path) @@ -146,7 +158,7 @@ def getsize(self, path=None): return size def setitems(self, values): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() values = {self._normalize_key(key): val for key, val in values.items()} @@ -162,7 +174,7 @@ def setitems(self, values): self.map.setitems(values) def rmdir(self, path=None): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() if path: for base in [meta_root, data_root]: @@ -172,10 +184,10 @@ def rmdir(self, path=None): # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) else: store_path = self.dir_path(path) @@ -213,7 +225,6 @@ def get_partial_values(self, key_ranges): class MemoryStoreV3(MemoryStore, StoreV3): - def __init__(self, root=None, cls=dict, dimension_separator=None): if root is None: self.root = cls() @@ -225,9 +236,7 @@ def __init__(self, root=None, cls=dict, dimension_separator=None): def __eq__(self, other): return ( - isinstance(other, MemoryStoreV3) and - self.root == other.root and - self.cls == other.cls + isinstance(other, MemoryStoreV3) and self.root == other.root and self.cls == other.cls ) def __setitem__(self, key, value): @@ -256,13 +265,13 @@ def rename(self, src_path: Path, dst_path: Path): if base == meta_root: # check for and move corresponding metadata sfx = _get_metadata_suffix(self) - src_meta = src_key + '.array' + sfx + src_meta = src_key + ".array" + sfx if src_meta in src_parent: - dst_meta = dst_key + '.array' + sfx + dst_meta = dst_key + ".array" + sfx dst_parent[dst_meta] = src_parent.pop(src_meta) - src_meta = src_key + '.group' + sfx + src_meta = src_key + ".group" + sfx if src_meta in src_parent: - dst_meta = dst_key + '.group' + sfx + dst_meta = dst_key + ".group" + sfx dst_parent[dst_meta] = src_parent.pop(src_meta) any_renamed = True any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed @@ -284,10 +293,10 @@ def rmdir(self, path: Path = None): # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) else: # clear out root @@ -298,15 +307,11 @@ def rmdir(self, path: Path = None): class DirectoryStoreV3(DirectoryStore, StoreV3): - def list(self): return list(self.keys()) def __eq__(self, other): - return ( - isinstance(other, DirectoryStoreV3) and - self.path == other.path - ) + return isinstance(other, DirectoryStoreV3) and self.path == other.path def __setitem__(self, key, value): self._validate_key(key) @@ -315,25 +320,24 @@ def __setitem__(self, key, value): def getsize(self, path: Path = None): return _getsize(self, path) - def rename(self, src_path, dst_path, metadata_key_suffix='.json'): + def rename(self, src_path, dst_path, metadata_key_suffix=".json"): store_src_path = normalize_storage_path(src_path) store_dst_path = normalize_storage_path(dst_path) dir_path = self.path any_existed = False - for root_prefix in ['meta', 'data']: - src_path = os.path.join(dir_path, root_prefix, 'root', store_src_path) + for root_prefix in ["meta", "data"]: + src_path = os.path.join(dir_path, root_prefix, "root", store_src_path) if os.path.exists(src_path): any_existed = True - dst_path = os.path.join(dir_path, root_prefix, 'root', store_dst_path) + dst_path = os.path.join(dir_path, root_prefix, "root", store_dst_path) os.renames(src_path, dst_path) - for suffix in ['.array' + metadata_key_suffix, - '.group' + metadata_key_suffix]: - src_meta = os.path.join(dir_path, 'meta', 'root', store_src_path + suffix) + for suffix in [".array" + metadata_key_suffix, ".group" + metadata_key_suffix]: + src_meta = os.path.join(dir_path, "meta", "root", store_src_path + suffix) if os.path.exists(src_meta): any_existed = True - dst_meta = os.path.join(dir_path, 'meta', 'root', store_dst_path + suffix) + dst_meta = os.path.join(dir_path, "meta", "root", store_dst_path + suffix) dst_dir = os.path.dirname(dst_meta) if not os.path.exists(dst_dir): os.makedirs(dst_dir) @@ -352,10 +356,10 @@ def rmdir(self, path=None): # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) elif os.path.isdir(dir_path): @@ -366,16 +370,15 @@ def rmdir(self, path=None): class ZipStoreV3(ZipStore, StoreV3): - def list(self): return list(self.keys()) def __eq__(self, other): return ( - isinstance(other, ZipStore) and - self.path == other.path and - self.compression == other.compression and - self.allowZip64 == other.allowZip64 + isinstance(other, ZipStore) + and self.path == other.path + and self.compression == other.compression + and self.allowZip64 == other.allowZip64 ) def __setitem__(self, key, value): @@ -405,7 +408,6 @@ def getsize(self, path=None): class RedisStoreV3(RmdirV3, RedisStore, StoreV3): - def list(self): return list(self.keys()) @@ -418,7 +420,6 @@ def __setitem__(self, key, value): class MongoDBStoreV3(RmdirV3, MongoDBStore, StoreV3): - def list(self): return list(self.keys()) @@ -431,7 +432,6 @@ def __setitem__(self, key, value): class DBMStoreV3(RmdirV3, DBMStore, StoreV3): - def list(self): return list(self.keys()) @@ -444,7 +444,6 @@ def __setitem__(self, key, value): class LMDBStoreV3(RmdirV3, LMDBStore, StoreV3): - def list(self): return list(self.keys()) @@ -457,7 +456,6 @@ def __setitem__(self, key, value): class SQLiteStoreV3(SQLiteStore, StoreV3): - def list(self): return list(self.keys()) @@ -490,15 +488,13 @@ def rmdir(self, path=None): if path: for base in [meta_root, data_root]: with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,) - ) + self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,)) # remove any associated metadata files sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip('/') - array_meta_file = meta_dir + '.array' + sfx + meta_dir = (meta_root + path).rstrip("/") + array_meta_file = meta_dir + ".array" + sfx self.pop(array_meta_file, None) - group_meta_file = meta_dir + '.group' + sfx + group_meta_file = meta_dir + ".group" + sfx self.pop(group_meta_file, None) else: self.clear() @@ -508,7 +504,6 @@ def rmdir(self, path=None): class LRUStoreCacheV3(RmdirV3, LRUStoreCache, StoreV3): - def __init__(self, store, max_size: int): self._store = StoreV3._ensure_store(store) self._max_size = max_size @@ -572,10 +567,11 @@ def __init__(self, store: StoreLike, metadata_key=meta_root + "consolidated/.zme meta = json_loads(self.store[metadata_key]) # check format of consolidated metadata - consolidated_format = meta.get('zarr_consolidated_format', None) + consolidated_format = meta.get("zarr_consolidated_format", None) if consolidated_format != 1: - raise MetadataError('unsupported zarr consolidated metadata format: %s' % - consolidated_format) + raise MetadataError( + "unsupported zarr consolidated metadata format: %s" % consolidated_format + ) # decode metadata self.meta_store: Store = KVStoreV3(meta["metadata"]) @@ -586,34 +582,37 @@ def rmdir(self, key): def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseStore: # default to v2 store for backward compatibility - zarr_version = getattr(store, '_store_version', 3) + zarr_version = getattr(store, "_store_version", 3) if zarr_version != 3: raise ValueError("store must be a version 3 store") if store is None: store = KVStoreV3(dict()) # add default zarr.json metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) return store if isinstance(store, os.PathLike): store = os.fspath(store) if FSStore._fsspec_installed(): import fsspec + if isinstance(store, fsspec.FSMap): - return FSStoreV3(store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {})) + return FSStoreV3( + store.root, + fs=store.fs, + mode=mode, + check=store.check, + create=store.create, + missing_exceptions=store.missing_exceptions, + **(storage_options or {}), + ) if isinstance(store, str): if "://" in store or "::" in store: store = FSStoreV3(store, mode=mode, **(storage_options or {})) elif storage_options: raise ValueError("storage_options passed with non-fsspec path") - elif store.endswith('.zip'): + elif store.endswith(".zip"): store = ZipStoreV3(store, mode=mode) - elif store.endswith('.n5'): + elif store.endswith(".n5"): raise NotImplementedError("N5Store not yet implemented for V3") # return N5StoreV3(store) else: @@ -621,7 +620,7 @@ def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseS else: store = StoreV3._ensure_store(store) - if 'zarr.json' not in store: + if "zarr.json" not in store: # add default zarr.json metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) return store diff --git a/zarr/_storage/v3_storage_transformers.py b/zarr/_storage/v3_storage_transformers.py index 3675d42c38..ff31a7281c 100644 --- a/zarr/_storage/v3_storage_transformers.py +++ b/zarr/_storage/v3_storage_transformers.py @@ -10,10 +10,10 @@ from zarr.util import normalize_storage_path -MAX_UINT_64 = 2 ** 64 - 1 +MAX_UINT_64 = 2**64 - 1 -v3_sharding_available = os.environ.get('ZARR_V3_SHARDING', '0').lower() not in ['0', 'false'] +v3_sharding_available = os.environ.get("ZARR_V3_SHARDING", "0").lower() not in ["0", "false"] def assert_zarr_v3_sharding_available(): @@ -31,8 +31,7 @@ class _ShardIndex(NamedTuple): def __localize_chunk__(self, chunk: Tuple[int, ...]) -> Tuple[int, ...]: return tuple( - chunk_i % shard_i - for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) + chunk_i % shard_i for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) ) def is_all_empty(self) -> bool: @@ -46,9 +45,7 @@ def get_chunk_slice(self, chunk: Tuple[int, ...]) -> Optional[slice]: else: return slice(int(chunk_start), int(chunk_start + chunk_len)) - def set_chunk_slice( - self, chunk: Tuple[int, ...], chunk_slice: Optional[slice] - ) -> None: + def set_chunk_slice(self, chunk: Tuple[int, ...], chunk_slice: Optional[slice]) -> None: localized_chunk = self.__localize_chunk__(chunk) if chunk_slice is None: self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) @@ -79,8 +76,7 @@ def from_bytes( def create_empty(cls, store: "ShardingStorageTransformer"): # reserving 2*64bit per chunk for offset and length: return cls.from_bytes( - MAX_UINT_64.to_bytes(8, byteorder="little") - * (2 * store._num_chunks_per_shard), + MAX_UINT_64.to_bytes(8, byteorder="little") * (2 * store._num_chunks_per_shard), store=store, ) @@ -98,15 +94,13 @@ def __init__(self, _type, chunks_per_shard) -> None: assert_zarr_v3_sharding_available() super().__init__(_type) if isinstance(chunks_per_shard, int): - chunks_per_shard = (chunks_per_shard, ) + chunks_per_shard = (chunks_per_shard,) else: chunks_per_shard = tuple(int(i) for i in chunks_per_shard) if chunks_per_shard == (): - chunks_per_shard = (1, ) + chunks_per_shard = (1,) self.chunks_per_shard = chunks_per_shard - self._num_chunks_per_shard = functools.reduce( - lambda x, y: x * y, chunks_per_shard, 1 - ) + self._num_chunks_per_shard = functools.reduce(lambda x, y: x * y, chunks_per_shard, 1) self._dimension_separator = None self._data_key_prefix = None @@ -118,36 +112,33 @@ def _copy_for_array(self, array, inner_store): # The array shape might be longer when initialized with subdtypes. # subdtypes dimensions come last, therefore padding chunks_per_shard # with ones, effectively disabling sharding on the unlisted dimensions. - transformer_copy.chunks_per_shard += ( - (1, ) * (len(array._shape) - len(self.chunks_per_shard)) + transformer_copy.chunks_per_shard += (1,) * ( + len(array._shape) - len(self.chunks_per_shard) ) return transformer_copy @property def dimension_separator(self) -> str: - assert self._dimension_separator is not None, ( - "dimension_separator is not initialized, first get a copy via _copy_for_array." - ) + assert ( + self._dimension_separator is not None + ), "dimension_separator is not initialized, first get a copy via _copy_for_array." return self._dimension_separator def _is_data_key(self, key: str) -> bool: - assert self._data_key_prefix is not None, ( - "data_key_prefix is not initialized, first get a copy via _copy_for_array." - ) + assert ( + self._data_key_prefix is not None + ), "data_key_prefix is not initialized, first get a copy via _copy_for_array." return key.startswith(self._data_key_prefix) def _key_to_shard(self, chunk_key: str) -> Tuple[str, Tuple[int, ...]]: prefix, _, chunk_string = chunk_key.rpartition("c") - chunk_subkeys = tuple( - map(int, chunk_string.split(self.dimension_separator)) - ) if chunk_string else (0, ) - shard_key_tuple = ( - subkey // shard_i - for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) + chunk_subkeys = ( + tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) ) - shard_key = ( - prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) + shard_key_tuple = ( + subkey // shard_i for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) ) + shard_key = prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) return shard_key, chunk_subkeys def _get_index_from_store(self, shard_key: str) -> _ShardIndex: @@ -164,16 +155,14 @@ def _get_index_from_store(self, shard_key: str) -> _ShardIndex: def _get_index_from_buffer(self, buffer: Union[bytes, bytearray]) -> _ShardIndex: # At the end of each shard 2*64bit per chunk for offset and length define the index: - return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard:], self) + return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard :], self) def _get_chunks_in_shard(self, shard_key: str) -> Iterator[Tuple[int, ...]]: _, _, chunk_string = shard_key.rpartition("c") - shard_key_tuple = tuple( - map(int, chunk_string.split(self.dimension_separator)) - ) if chunk_string else (0, ) - for chunk_offset in itertools.product( - *(range(i) for i in self.chunks_per_shard) - ): + shard_key_tuple = ( + tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) + ) + for chunk_offset in itertools.product(*(range(i) for i in self.chunks_per_shard)): yield tuple( shard_key_i * shards_i + offset_i for shard_key_i, offset_i, shards_i in zip( @@ -250,9 +239,7 @@ def __setitem__(self, key, value): for _, chunk_slice in valid_chunk_slices ] ) - for chunk_value, (chunk_to_read, _) in zip( - chunk_values, valid_chunk_slices - ): + for chunk_value, (chunk_to_read, _) in zip(chunk_values, valid_chunk_slices): new_content[chunk_to_read] = chunk_value else: if full_shard_value is None: @@ -263,9 +250,7 @@ def __setitem__(self, key, value): shard_content = b"" for chunk_subkey, chunk_content in new_content.items(): - chunk_slice = slice( - len(shard_content), len(shard_content) + len(chunk_content) - ) + chunk_slice = slice(len(shard_content), len(shard_content) + len(chunk_content)) index.set_chunk_slice(chunk_subkey, chunk_slice) shard_content += chunk_content # Appending the index at the end of the shard: @@ -298,9 +283,7 @@ def _shard_key_to_original_keys(self, key: str) -> Iterator[str]: prefix, _, _ = key.rpartition("c") for chunk_tuple in self._get_chunks_in_shard(key): if index.get_chunk_slice(chunk_tuple) is not None: - yield prefix + "c" + self.dimension_separator.join( - map(str, chunk_tuple) - ) + yield prefix + "c" + self.dimension_separator.join(map(str, chunk_tuple)) else: yield key diff --git a/zarr/attrs.py b/zarr/attrs.py index 60dd7f1d79..01fc617b3c 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -25,10 +25,9 @@ class Attributes(MutableMapping): """ - def __init__(self, store, key='.zattrs', read_only=False, cache=True, - synchronizer=None): + def __init__(self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None): - self._version = getattr(store, '_store_version', 2) + self._version = getattr(store, "_store_version", 2) _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) self.key = key @@ -43,7 +42,7 @@ def _get_nosync(self): except KeyError: d = dict() if self._version > 2: - d['attributes'] = {} + d["attributes"] = {} else: d = self.store._metadata_class.parse_metadata(data) return d @@ -54,7 +53,7 @@ def asdict(self): return self._cached_asdict d = self._get_nosync() if self._version == 3: - d = d['attributes'] + d = d["attributes"] if self.cache: self._cached_asdict = d return d @@ -65,7 +64,7 @@ def refresh(self): if self._version == 2: self._cached_asdict = self._get_nosync() else: - self._cached_asdict = self._get_nosync()['attributes'] + self._cached_asdict = self._get_nosync()["attributes"] def __contains__(self, x): return x in self.asdict() @@ -77,7 +76,7 @@ def _write_op(self, f, *args, **kwargs): # guard condition if self.read_only: - raise PermissionError('attributes are read-only') + raise PermissionError("attributes are read-only") # synchronization if self.synchronizer is None: @@ -98,7 +97,7 @@ def _setitem_nosync(self, item, value): if self._version == 2: d[item] = value else: - d['attributes'][item] = value + d["attributes"][item] = value # _put modified data self._put_nosync(d) @@ -115,7 +114,7 @@ def _delitem_nosync(self, key): if self._version == 2: del d[key] else: - del d['attributes'][key] + del d["attributes"][key] # _put modified data self._put_nosync(d) @@ -137,8 +136,8 @@ def _put_nosync(self, d): warnings.warn( "only attribute keys of type 'string' will be allowed in the future", DeprecationWarning, - stacklevel=2 - ) + stacklevel=2, + ) try: d_to_check = {str(k): v for k, v in d_to_check.items()} @@ -163,15 +162,15 @@ def _put_nosync(self, d): # Note: this changes the store.counter result in test_caching_on! meta = self.store._metadata_class.parse_metadata(self.store[self.key]) - if 'attributes' in meta and 'filters' in meta['attributes']: + if "attributes" in meta and "filters" in meta["attributes"]: # need to preserve any existing "filters" attribute - d['attributes']['filters'] = meta['attributes']['filters'] - meta['attributes'] = d['attributes'] + d["attributes"]["filters"] = meta["attributes"]["filters"] + meta["attributes"] = d["attributes"] else: meta = d self.store[self.key] = json_dumps(meta) if self.cache: - self._cached_asdict = d['attributes'] + self._cached_asdict = d["attributes"] # noinspection PyMethodOverriding def update(self, *args, **kwargs): @@ -187,7 +186,7 @@ def _update_nosync(self, *args, **kwargs): if self._version == 2: d.update(*args, **kwargs) else: - d['attributes'].update(*args, **kwargs) + d["attributes"].update(*args, **kwargs) # _put modified data self._put_nosync(d) diff --git a/zarr/context.py b/zarr/context.py index 83fbaafa9b..3dd7dda4ac 100644 --- a/zarr/context.py +++ b/zarr/context.py @@ -1,11 +1,10 @@ - from typing import TypedDict from numcodecs.compat import NDArrayLike class Context(TypedDict, total=False): - """ A context for component specific information + """A context for component specific information All keys are optional. Any component reading the context must provide a default implementation in the case a key cannot be found. @@ -16,4 +15,5 @@ class Context(TypedDict, total=False): An array-like instance to use for determining the preferred output array type. """ + meta_array: NDArrayLike diff --git a/zarr/convenience.py b/zarr/convenience.py index 9a0eae20a3..ff236d0df2 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -14,8 +14,14 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import (_get_metadata_suffix, contains_array, contains_group, - normalize_store_arg, BaseStore, ConsolidatedMetadataStore) +from zarr.storage import ( + _get_metadata_suffix, + contains_array, + contains_group, + normalize_store_arg, + BaseStore, + ConsolidatedMetadataStore, +) from zarr._storage.v3 import ConsolidatedMetadataStoreV3 from zarr.util import TreeViewer, buffer_size, normalize_storage_path @@ -25,7 +31,7 @@ def _check_and_update_path(store: BaseStore, path): - if getattr(store, '_store_version', 2) > 2 and not path: + if getattr(store, "_store_version", 2) > 2 and not path: raise ValueError("path must be provided for v3 stores") return normalize_storage_path(path) @@ -94,15 +100,17 @@ def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=No # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL _store: BaseStore = normalize_store_arg( - store, storage_options=kwargs.pop("storage_options", {}), mode=mode, + store, + storage_options=kwargs.pop("storage_options", {}), + mode=mode, zarr_version=zarr_version, ) # path = _check_and_update_path(_store, path) path = normalize_storage_path(path) - kwargs['path'] = path + kwargs["path"] = path - if mode in {'w', 'w-', 'x'}: - if 'shape' in kwargs: + if mode in {"w", "w-", "x"}: + if "shape" in kwargs: return open_array(_store, mode=mode, **kwargs) else: return open_group(_store, mode=mode, **kwargs) @@ -167,8 +175,9 @@ def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs) _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) path = _check_and_update_path(_store, path) try: - _create_array(arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, - **kwargs) + _create_array( + arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, **kwargs + ) finally: if may_need_closing: # needed to ensure zip file records are written @@ -240,7 +249,7 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): """ if len(args) == 0 and len(kwargs) == 0: - raise ValueError('at least one array must be provided') + raise ValueError("at least one array must be provided") # handle polymorphic store arg may_need_closing = _might_close(store) _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) @@ -248,7 +257,7 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): try: grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version) for i, arr in enumerate(args): - k = 'arr_{}'.format(i) + k = "arr_{}".format(i) grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) for k, arr in kwargs.items(): grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) @@ -337,16 +346,14 @@ def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): """ if len(args) == 0 and len(kwargs) == 0: - raise ValueError('at least one array must be provided') + raise ValueError("at least one array must be provided") if len(args) == 1 and len(kwargs) == 0: save_array(store, args[0], zarr_version=zarr_version, path=path) else: - save_group(store, *args, zarr_version=zarr_version, path=path, - **kwargs) + save_group(store, *args, zarr_version=zarr_version, path=path, **kwargs) class LazyLoader(Mapping): - def __init__(self, grp): self.grp = grp self.cache = dict() @@ -369,9 +376,9 @@ def __contains__(self, item): return item in self.grp def __repr__(self): - r = ' ' + dest_key + descr = descr + " -> " + dest_key # decide what to do do_copy = True - if if_exists != 'replace': + if if_exists != "replace": if dest_key in dest: - if if_exists == 'raise': - raise CopyError('key {!r} exists in destination' - .format(dest_key)) - elif if_exists == 'skip': + if if_exists == "raise": + raise CopyError("key {!r} exists in destination".format(dest_key)) + elif if_exists == "skip": do_copy = False # take action if do_copy: - log('copy {}'.format(descr)) + log("copy {}".format(descr)) if not dry_run: data = source[source_key] n_bytes_copied += buffer_size(data) dest[dest_key] = data n_copied += 1 else: - log('skip {}'.format(descr)) + log("skip {}".format(descr)) n_skipped += 1 # log a final message with a summary of what happened @@ -727,12 +743,21 @@ def copy_store(source, dest, source_path='', dest_path='', excludes=None, def _check_dest_is_group(dest): - if not hasattr(dest, 'create_dataset'): - raise ValueError('dest must be a group, got {!r}'.format(dest)) - - -def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None, - if_exists='raise', dry_run=False, **create_kws): + if not hasattr(dest, "create_dataset"): + raise ValueError("dest must be a group, got {!r}".format(dest)) + + +def copy( + source, + dest, + name=None, + shallow=False, + without_attrs=False, + log=None, + if_exists="raise", + dry_run=False, + **create_kws +): """Copy the `source` array or group into the `dest` group. Parameters @@ -855,8 +880,15 @@ def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None, # do the copying n_copied, n_skipped, n_bytes_copied = _copy( - log, source, dest, name=name, root=True, shallow=shallow, - without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, + log, + source, + dest, + name=name, + root=True, + shallow=shallow, + without_attrs=without_attrs, + if_exists=if_exists, + dry_run=dry_run, **create_kws ) @@ -866,47 +898,49 @@ def copy(source, dest, name=None, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, - dry_run, **create_kws): +def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_run, **create_kws): # N.B., if this is a dry run, dest may be None # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 # are we copying to/from h5py? - source_h5py = source.__module__.startswith('h5py.') - dest_h5py = dest is not None and dest.__module__.startswith('h5py.') + source_h5py = source.__module__.startswith("h5py.") + dest_h5py = dest is not None and dest.__module__.startswith("h5py.") # check if_exists parameter - valid_if_exists = ['raise', 'replace', 'skip', 'skip_initialized'] + valid_if_exists = ["raise", "replace", "skip", "skip_initialized"] if if_exists not in valid_if_exists: - raise ValueError('if_exists must be one of {!r}; found {!r}' - .format(valid_if_exists, if_exists)) - if dest_h5py and if_exists == 'skip_initialized': - raise ValueError('{!r} can only be used when copying to zarr' - .format(if_exists)) + raise ValueError( + "if_exists must be one of {!r}; found {!r}".format(valid_if_exists, if_exists) + ) + if dest_h5py and if_exists == "skip_initialized": + raise ValueError("{!r} can only be used when copying to zarr".format(if_exists)) # determine name to copy to if name is None: - name = source.name.split('/')[-1] + name = source.name.split("/")[-1] if not name: # this can happen if source is the root group - raise TypeError('source has no name, please provide the `name` ' - 'parameter to indicate a name to copy to') + raise TypeError( + "source has no name, please provide the `name` " + "parameter to indicate a name to copy to" + ) - if hasattr(source, 'shape'): + if hasattr(source, "shape"): # copy a dataset/array # check if already exists, decide what to do do_copy = True exists = dest is not None and name in dest if exists: - if if_exists == 'raise': - raise CopyError('an object {!r} already exists in destination ' - '{!r}'.format(name, dest.name)) - elif if_exists == 'skip': + if if_exists == "raise": + raise CopyError( + "an object {!r} already exists in destination " "{!r}".format(name, dest.name) + ) + elif if_exists == "skip": do_copy = False - elif if_exists == 'skip_initialized': + elif if_exists == "skip_initialized": ds = dest[name] if ds.nchunks_initialized == ds.nchunks: do_copy = False @@ -915,7 +949,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, if do_copy: # log a message about what we're going to do - log('copy {} {} {}'.format(source.name, source.shape, source.dtype)) + log("copy {} {} {}".format(source.name, source.shape, source.dtype)) if not dry_run: @@ -927,38 +961,37 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, kws = create_kws.copy() # setup chunks option, preserve by default - kws.setdefault('chunks', source.chunks) + kws.setdefault("chunks", source.chunks) # setup compression options if source_h5py: if dest_h5py: # h5py -> h5py; preserve compression options by default - kws.setdefault('compression', source.compression) - kws.setdefault('compression_opts', source.compression_opts) - kws.setdefault('shuffle', source.shuffle) - kws.setdefault('fletcher32', source.fletcher32) - kws.setdefault('fillvalue', source.fillvalue) + kws.setdefault("compression", source.compression) + kws.setdefault("compression_opts", source.compression_opts) + kws.setdefault("shuffle", source.shuffle) + kws.setdefault("fletcher32", source.fletcher32) + kws.setdefault("fillvalue", source.fillvalue) else: # h5py -> zarr; use zarr default compression options - kws.setdefault('fill_value', source.fillvalue) + kws.setdefault("fill_value", source.fillvalue) else: if dest_h5py: # zarr -> h5py; use some vaguely sensible defaults - kws.setdefault('chunks', True) - kws.setdefault('compression', 'gzip') - kws.setdefault('compression_opts', 1) - kws.setdefault('shuffle', False) - kws.setdefault('fillvalue', source.fill_value) + kws.setdefault("chunks", True) + kws.setdefault("compression", "gzip") + kws.setdefault("compression_opts", 1) + kws.setdefault("shuffle", False) + kws.setdefault("fillvalue", source.fill_value) else: # zarr -> zarr; preserve compression options by default - kws.setdefault('compressor', source.compressor) - kws.setdefault('filters', source.filters) - kws.setdefault('order', source.order) - kws.setdefault('fill_value', source.fill_value) + kws.setdefault("compressor", source.compressor) + kws.setdefault("filters", source.filters) + kws.setdefault("order", source.order) + kws.setdefault("fill_value", source.fill_value) # create new dataset in destination - ds = dest.create_dataset(name, shape=source.shape, - dtype=source.dtype, **kws) + ds = dest.create_dataset(name, shape=source.shape, dtype=source.dtype, **kws) # copy data - N.B., go chunk by chunk to avoid loading # everything into memory @@ -966,19 +999,18 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, chunks = ds.chunks chunk_offsets = [range(0, s, c) for s, c in zip(shape, chunks)] for offset in itertools.product(*chunk_offsets): - sel = tuple(slice(o, min(s, o + c)) - for o, s, c in zip(offset, shape, chunks)) + sel = tuple(slice(o, min(s, o + c)) for o, s, c in zip(offset, shape, chunks)) ds[sel] = source[sel] n_bytes_copied += ds.size * ds.dtype.itemsize # copy attributes if not without_attrs: - if dest_h5py and 'filters' in source.attrs: + if dest_h5py and "filters" in source.attrs: # No filters key in v3 metadata so it was stored in the # attributes instead. We cannot copy this key to # HDF5 attrs, though! source_attrs = source.attrs.asdict().copy() - source_attrs.pop('filters', None) + source_attrs.pop("filters", None) else: source_attrs = source.attrs ds.attrs.update(source_attrs) @@ -986,7 +1018,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, n_copied += 1 else: - log('skip {} {} {}'.format(source.name, source.shape, source.dtype)) + log("skip {} {} {}".format(source.name, source.shape, source.dtype)) n_skipped += 1 elif root or not shallow: @@ -994,21 +1026,20 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # check if an array is in the way do_copy = True - exists_array = (dest is not None and - name in dest and - hasattr(dest[name], 'shape')) + exists_array = dest is not None and name in dest and hasattr(dest[name], "shape") if exists_array: - if if_exists == 'raise': - raise CopyError('an array {!r} already exists in destination ' - '{!r}'.format(name, dest.name)) - elif if_exists == 'skip': + if if_exists == "raise": + raise CopyError( + "an array {!r} already exists in destination " "{!r}".format(name, dest.name) + ) + elif if_exists == "skip": do_copy = False # take action if do_copy: # log action - log('copy {}'.format(source.name)) + log("copy {}".format(source.name)) if not dry_run: @@ -1035,9 +1066,17 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, # recurse for k in source.keys(): c, s, b = _copy( - log, source[k], grp, name=k, root=False, shallow=shallow, - without_attrs=without_attrs, if_exists=if_exists, - dry_run=dry_run, **create_kws) + log, + source[k], + grp, + name=k, + root=False, + shallow=shallow, + without_attrs=without_attrs, + if_exists=if_exists, + dry_run=dry_run, + **create_kws + ) n_copied += c n_skipped += s n_bytes_copied += b @@ -1045,14 +1084,22 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, n_copied += 1 else: - log('skip {}'.format(source.name)) + log("skip {}".format(source.name)) n_skipped += 1 return n_copied, n_skipped, n_bytes_copied -def copy_all(source, dest, shallow=False, without_attrs=False, log=None, - if_exists='raise', dry_run=False, **create_kws): +def copy_all( + source, + dest, + shallow=False, + without_attrs=False, + log=None, + if_exists="raise", + dry_run=False, + **create_kws +): """Copy all children of the `source` group into the `dest` group. Parameters @@ -1137,16 +1184,24 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 - zarr_version = getattr(source, '_version', 2) + zarr_version = getattr(source, "_version", 2) # setup logging with _LogWriter(log) as log: for k in source.keys(): c, s, b = _copy( - log, source[k], dest, name=k, root=False, shallow=shallow, - without_attrs=without_attrs, if_exists=if_exists, - dry_run=dry_run, **create_kws) + log, + source[k], + dest, + name=k, + root=False, + shallow=shallow, + without_attrs=without_attrs, + if_exists=if_exists, + dry_run=dry_run, + **create_kws + ) n_copied += c n_skipped += s n_bytes_copied += b @@ -1159,7 +1214,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path=''): +def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path=""): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1203,8 +1258,7 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path='' if version == 2: def is_zarr_key(key): - return (key.endswith('.zarray') or key.endswith('.zgroup') or - key.endswith('.zattrs')) + return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") else: @@ -1213,23 +1267,21 @@ def is_zarr_key(key): sfx = _get_metadata_suffix(store) # type: ignore def is_zarr_key(key): - return (key.endswith('.array' + sfx) or key.endswith('.group' + sfx) or - key == 'zarr.json') + return ( + key.endswith(".array" + sfx) or key.endswith(".group" + sfx) or key == "zarr.json" + ) # cannot create a group without a path in v3 # so create /meta/root/consolidated group to store the metadata - if 'consolidated' not in store: - _create_group(store, path='consolidated') - if not metadata_key.startswith('meta/root/'): - metadata_key = 'meta/root/consolidated/' + metadata_key + if "consolidated" not in store: + _create_group(store, path="consolidated") + if not metadata_key.startswith("meta/root/"): + metadata_key = "meta/root/consolidated/" + metadata_key # path = 'consolidated' out = { - 'zarr_consolidated_format': 1, - 'metadata': { - key: json_loads(store[key]) - for key in store if is_zarr_key(key) - } + "zarr_consolidated_format": 1, + "metadata": {key: json_loads(store[key]) for key in store if is_zarr_key(key)}, } store[metadata_key] = json_dumps(out) return open_consolidated(store, metadata_key=metadata_key, path=path) @@ -1278,26 +1330,26 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** """ # normalize parameters - zarr_version = kwargs.get('zarr_version') - store = normalize_store_arg(store, storage_options=kwargs.get("storage_options"), mode=mode, - zarr_version=zarr_version) - if mode not in {'r', 'r+'}: - raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}" - .format(mode)) - - path = kwargs.pop('path', None) + zarr_version = kwargs.get("zarr_version") + store = normalize_store_arg( + store, storage_options=kwargs.get("storage_options"), mode=mode, zarr_version=zarr_version + ) + if mode not in {"r", "r+"}: + raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}".format(mode)) + + path = kwargs.pop("path", None) if store._store_version == 2: ConsolidatedStoreClass = ConsolidatedMetadataStore else: assert_zarr_v3_api_available() ConsolidatedStoreClass = ConsolidatedMetadataStoreV3 # default is to store within 'consolidated' group on v3 - if not metadata_key.startswith('meta/root/'): - metadata_key = 'meta/root/consolidated/' + metadata_key + if not metadata_key.startswith("meta/root/"): + metadata_key = "meta/root/consolidated/" + metadata_key # setup metadata store meta_store = ConsolidatedStoreClass(store, metadata_key=metadata_key) # pass through - chunk_store = kwargs.pop('chunk_store', None) or store + chunk_store = kwargs.pop("chunk_store", None) or store return open(store=meta_store, chunk_store=chunk_store, mode=mode, path=path, **kwargs) diff --git a/zarr/core.py b/zarr/core.py index 80f424bafc..43ccdbaf7d 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -193,17 +193,16 @@ def __init__( assert_zarr_v3_api_available() if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, - zarr_version=zarr_version) + chunk_store = normalize_store_arg(chunk_store, zarr_version=zarr_version) self._store = store self._chunk_store = chunk_store self._transformed_chunk_store = None self._path = normalize_storage_path(path) if self._path: - self._key_prefix = self._path + '/' + self._key_prefix = self._path + "/" else: - self._key_prefix = '' + self._key_prefix = "" self._read_only = bool(read_only) self._synchronizer = synchronizer self._cache_metadata = cache_metadata @@ -216,18 +215,19 @@ def __init__( self._meta_array = np.empty(()) self._version = zarr_version if self._version == 3: - self._data_key_prefix = 'data/root/' + self._key_prefix - self._data_path = 'data/root/' + self._path + self._data_key_prefix = "data/root/" + self._key_prefix + self._data_path = "data/root/" + self._path self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) - self._metadata_key_suffix = self._hierarchy_metadata['metadata_key_suffix'] + self._metadata_key_suffix = self._hierarchy_metadata["metadata_key_suffix"] # initialize metadata self._load_metadata() # initialize attributes akey = _prefix_to_attrs_key(self._store, self._key_prefix) - self._attrs = Attributes(store, key=akey, read_only=read_only, - synchronizer=synchronizer, cache=cache_attrs) + self._attrs = Attributes( + store, key=akey, read_only=read_only, synchronizer=synchronizer, cache=cache_attrs + ) # initialize info reporter self._info_reporter = InfoReporter(self) @@ -257,13 +257,13 @@ def _load_metadata_nosync(self): # decode and store metadata as instance members meta = self._store._metadata_class.decode_array_metadata(meta_bytes) self._meta = meta - self._shape = meta['shape'] - self._fill_value = meta['fill_value'] - dimension_separator = meta.get('dimension_separator', None) + self._shape = meta["shape"] + self._fill_value = meta["fill_value"] + dimension_separator = meta.get("dimension_separator", None) if self._version == 2: - self._chunks = meta['chunks'] - self._dtype = meta['dtype'] - self._order = meta['order'] + self._chunks = meta["chunks"] + self._dtype = meta["dtype"] + self._order = meta["order"] if dimension_separator is None: try: dimension_separator = self._store._dimension_separator @@ -274,17 +274,17 @@ def _load_metadata_nosync(self): if dimension_separator is None: dimension_separator = "." else: - self._chunks = meta['chunk_grid']['chunk_shape'] - self._dtype = meta['data_type'] - self._order = meta['chunk_memory_layout'] - chunk_separator = meta['chunk_grid']['separator'] + self._chunks = meta["chunk_grid"]["chunk_shape"] + self._dtype = meta["data_type"] + self._order = meta["chunk_memory_layout"] + chunk_separator = meta["chunk_grid"]["separator"] if dimension_separator is None: - dimension_separator = meta.get('dimension_separator', chunk_separator) + dimension_separator = meta.get("dimension_separator", chunk_separator) self._dimension_separator = dimension_separator # setup compressor - compressor = meta.get('compressor', None) + compressor = meta.get("compressor", None) if compressor is None: self._compressor = None elif self._version == 2: @@ -294,17 +294,17 @@ def _load_metadata_nosync(self): # setup filters if self._version == 2: - filters = meta.get('filters', []) + filters = meta.get("filters", []) else: # TODO: storing filters under attributes for now since the v3 # array metadata does not have a 'filters' attribute. - filters = meta['attributes'].get('filters', []) + filters = meta["attributes"].get("filters", []) if filters: filters = [get_codec(config) for config in filters] self._filters = filters if self._version == 3: - storage_transformers = meta.get('storage_transformers', []) + storage_transformers = meta.get("storage_transformers", []) if storage_transformers: transformed_store = self._chunk_store or self._store for storage_transformer in storage_transformers[::-1]: @@ -323,7 +323,7 @@ def _refresh_metadata_nosync(self): def _flush_metadata_nosync(self): if self._is_view: - raise PermissionError('operation not permitted for views') + raise PermissionError("operation not permitted for views") if self._compressor: compressor_config = self._compressor.get_config() @@ -334,20 +334,26 @@ def _flush_metadata_nosync(self): else: filters_config = None _compressor = compressor_config if self._version == 2 else self._compressor - meta = dict(shape=self._shape, compressor=_compressor, - fill_value=self._fill_value, filters=filters_config) - if getattr(self._store, '_store_version', 2) == 2: - meta.update( - dict(chunks=self._chunks, dtype=self._dtype, order=self._order) - ) + meta = dict( + shape=self._shape, + compressor=_compressor, + fill_value=self._fill_value, + filters=filters_config, + ) + if getattr(self._store, "_store_version", 2) == 2: + meta.update(dict(chunks=self._chunks, dtype=self._dtype, order=self._order)) else: meta.update( - dict(chunk_grid=dict(type='regular', - chunk_shape=self._chunks, - separator=self._dimension_separator), - data_type=self._dtype, - chunk_memory_layout=self._order, - attributes=self.attrs.asdict()) + dict( + chunk_grid=dict( + type="regular", + chunk_shape=self._chunks, + separator=self._dimension_separator, + ), + data_type=self._dtype, + chunk_memory_layout=self._order, + attributes=self.attrs.asdict(), + ) ) mkey = _prefix_to_array_key(self._store, self._key_prefix) self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) @@ -368,8 +374,8 @@ def name(self): if self.path: # follow h5py convention: add leading slash name = self.path - if name[0] != '/': - name = '/' + name + if name[0] != "/": + name = "/" + name return name return None @@ -377,7 +383,7 @@ def name(self): def basename(self): """Final component of name.""" if self.name is not None: - return self.name.split('/')[-1] + return self.name.split("/")[-1] return None @property @@ -513,10 +519,9 @@ def nbytes_stored(self): @property def _cdata_shape(self): if self._shape == (): - return 1, + return (1,) else: - return tuple(math.ceil(s / c) - for s, c in zip(self._shape, self._chunks)) + return tuple(math.ceil(s / c) for s, c in zip(self._shape, self._chunks)) @property def cdata_shape(self): @@ -550,14 +555,14 @@ def nchunks_initialized(self): # return sum(1 for k in members if prog.match(k)) # key pattern for chunk keys - prog = re.compile(self._data_key_prefix + r'c\d+') # TODO: ndim == 0 case? + prog = re.compile(self._data_key_prefix + r"c\d+") # TODO: ndim == 0 case? # get chunk keys, excluding the prefix members = self.chunk_store.list_prefix(self._data_path) # count the chunk keys return sum(1 for k in members if prog.match(k)) else: # key pattern for chunk keys - prog = re.compile(r'\.'.join([r'\d+'] * min(1, self.ndim))) + prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) # count chunk keys return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) @@ -605,11 +610,11 @@ def meta_array(self): def __eq__(self, other): return ( - isinstance(other, Array) and - self.store == other.store and - self.read_only == other.read_only and - self.path == other.path and - not self._is_view + isinstance(other, Array) + and self.store == other.store + and self.read_only == other.read_only + and self.path == other.path + and not self._is_view # N.B., no need to compare other properties, should be covered by # store comparison ) @@ -664,10 +669,10 @@ def islice(self, start=None, end=None): end = self.shape[0] if not isinstance(start, int) or start < 0: - raise ValueError('start must be a nonnegative integer') + raise ValueError("start must be a nonnegative integer") if not isinstance(end, int) or end < 0: - raise ValueError('end must be a nonnegative integer') + raise ValueError("end must be a nonnegative integer") # Avoid repeatedly decompressing chunks by iterating over the chunks # in the first dimension. @@ -675,7 +680,7 @@ def islice(self, start=None, end=None): chunk = None for j in range(start, end): if j % chunk_size == 0: - chunk = self[j: j + chunk_size] + chunk = self[j : j + chunk_size] # init chunk if we start offset of chunk borders elif chunk is None: chunk_start = j - j % chunk_size @@ -691,7 +696,7 @@ def __len__(self): return self.shape[0] else: # 0-dimensional array, same error message as numpy - raise TypeError('len() of unsized object') + raise TypeError("len() of unsized object") def __getitem__(self, selection): """Retrieve data for an item or region of the array. @@ -960,11 +965,9 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): # handle zero-dimensional arrays if self._shape == (): - return self._get_basic_selection_zd(selection=selection, out=out, - fields=fields) + return self._get_basic_selection_zd(selection=selection, out=out, fields=fields) else: - return self._get_basic_selection_nd(selection=selection, out=out, - fields=fields) + return self._get_basic_selection_nd(selection=selection, out=out, fields=fields) def _get_basic_selection_zd(self, selection, out=None, fields=None): # special case basic selection for zero-dimensional array @@ -1371,10 +1374,11 @@ def _get_selection(self, indexer, out=None, fields=None): # setup output array if out is None: - out = np.empty_like(self._meta_array, shape=out_shape, - dtype=out_dtype, order=self._order) + out = np.empty_like( + self._meta_array, shape=out_shape, dtype=out_dtype, order=self._order + ) else: - check_array_shape('out', out, out_shape) + check_array_shape("out", out, out_shape) # iterate over chunks @@ -1382,8 +1386,12 @@ def _get_selection(self, indexer, out=None, fields=None): # allow storage to get multiple items at once lchunk_coords, lchunk_selection, lout_selection = zip(*indexer) self._chunk_getitems( - lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=indexer.drop_axes, fields=fields + lchunk_coords, + lchunk_selection, + out, + lout_selection, + drop_axes=indexer.drop_axes, + fields=fields, ) if out.shape: return out @@ -1753,7 +1761,7 @@ def set_coordinate_selection(self, selection, value, fields=None): except TypeError: # Handle types like `list` or `tuple` value = np.array(value, like=self._meta_array) - if hasattr(value, 'shape') and len(value.shape) > 1: + if hasattr(value, "shape") and len(value.shape) > 1: value = value.reshape(-1) self._set_selection(indexer, value, fields=fields) @@ -1998,13 +2006,16 @@ def _set_selection(self, indexer, value, fields=None): # setting a scalar value pass else: - if not hasattr(value, 'shape'): + if not hasattr(value, "shape"): value = np.asanyarray(value, like=self._meta_array) - check_array_shape('value', value, sel_shape) + check_array_shape("value", value, sel_shape) # iterate over chunks in range - if not hasattr(self.chunk_store, "setitems") or self._synchronizer is not None \ - or any(map(lambda x: x == 0, self.shape)): + if ( + not hasattr(self.chunk_store, "setitems") + or self._synchronizer is not None + or any(map(lambda x: x == 0, self.shape)) + ): # iterative approach for chunk_coords, chunk_selection, out_selection in indexer: @@ -2044,8 +2055,7 @@ def _set_selection(self, indexer, value, fields=None): cv = chunk_value[item] chunk_values.append(cv) - self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, - fields=fields) + self._chunk_setitems(lchunk_coords, lchunk_selection, chunk_values, fields=fields) def _process_chunk( self, @@ -2059,23 +2069,22 @@ def _process_chunk( partial_read_decode=False, ): """Take binary data from storage and fill output array""" - if (out_is_ndarray and - not fields and - is_contiguous_selection(out_selection) and - is_total_slice(chunk_selection, self._chunks) and - not self._filters and - self._dtype != object): + if ( + out_is_ndarray + and not fields + and is_contiguous_selection(out_selection) + and is_total_slice(chunk_selection, self._chunks) + and not self._filters + and self._dtype != object + ): dest = out[out_selection] # Assume that array-like objects that doesn't have a # `writeable` flag is writable. dest_is_writable = getattr(dest, "writeable", True) - write_direct = ( - dest_is_writable and - ( - (self._order == 'C' and dest.flags.c_contiguous) or - (self._order == 'F' and dest.flags.f_contiguous) - ) + write_direct = dest_is_writable and ( + (self._order == "C" and dest.flags.c_contiguous) + or (self._order == "F" and dest.flags.f_contiguous) ) if write_direct: @@ -2104,9 +2113,7 @@ def _process_chunk( index_selection = PartialChunkIterator(chunk_selection, self.chunks) for start, nitems, partial_out_selection in index_selection: expected_shape = [ - len( - range(*partial_out_selection[i].indices(self.chunks[0] + 1)) - ) + len(range(*partial_out_selection[i].indices(self.chunks[0] + 1))) if i < len(partial_out_selection) else dim for i, dim in enumerate(self.chunks) @@ -2143,8 +2150,9 @@ def _process_chunk( # store selected data in output out[out_selection] = tmp - def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, - drop_axes=None, fields=None): + def _chunk_getitems( + self, lchunk_coords, lchunk_selection, out, lout_selection, drop_axes=None, fields=None + ): """Obtain part or whole of chunks. Parameters @@ -2238,8 +2246,10 @@ def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection, def _chunk_setitems(self, lchunk_coords, lchunk_selection, values, fields=None): ckeys = map(self._chunk_key, lchunk_coords) - cdatas = {key: self._process_for_setitem(key, sel, val, fields=fields) - for key, sel, val in zip(ckeys, lchunk_selection, values)} + cdatas = { + key: self._process_for_setitem(key, sel, val, fields=fields) + for key, sel, val in zip(ckeys, lchunk_selection, values) + } to_store = {} if not self.write_empty_chunks: empty_chunks = {k: v for k, v in cdatas.items() if all_equal(self.fill_value, v)} @@ -2291,8 +2301,7 @@ def _chunk_setitem(self, chunk_coords, chunk_selection, value, fields=None): lock = self._synchronizer[ckey] with lock: - self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, - fields=fields) + self._chunk_setitem_nosync(chunk_coords, chunk_selection, value, fields=fields) def _chunk_setitem_nosync(self, chunk_coords, chunk_selection, value, fields=None): ckey = self._chunk_key(chunk_coords) @@ -2354,7 +2363,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): # decode chunk chunk = self._decode_chunk(cdata) if not chunk.flags.writeable: - chunk = chunk.copy(order='K') + chunk = chunk.copy(order="K") # modify if fields: @@ -2372,8 +2381,12 @@ def _chunk_key(self, chunk_coords): # where P = self._key_prefix, i, j, ... = chunk_coords # e.g. c0/2/3 for 3d array with chunk index (0, 2, 3) # https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids - return ("data/root/" + self._key_prefix + - "c" + self._dimension_separator.join(map(str, chunk_coords))) + return ( + "data/root/" + + self._key_prefix + + "c" + + self._dimension_separator.join(map(str, chunk_coords)) + ) else: return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) @@ -2382,8 +2395,7 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): if self._compressor: # only decode requested items if ( - all(x is not None for x in [start, nitems]) - and self._compressor.codec_id == "blosc" + all(x is not None for x in [start, nitems]) and self._compressor.codec_id == "blosc" ) and hasattr(self._compressor, "decode_partial"): chunk = self._compressor.decode_partial(cdata, start, nitems) else: @@ -2408,10 +2420,10 @@ def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # codec in the filter chain, i.e., a filter that converts from object # array to something else during encoding, and converts back to object # array during decoding. - raise RuntimeError('cannot read object array without object codec') + raise RuntimeError("cannot read object array without object codec") # ensure correct chunk shape - chunk = chunk.reshape(-1, order='A') + chunk = chunk.reshape(-1, order="A") chunk = chunk.reshape(expected_shape or self._chunks, order=self._order) return chunk @@ -2425,7 +2437,7 @@ def _encode_chunk(self, chunk): # check object encoding if ensure_ndarray_like(chunk).dtype == object: - raise RuntimeError('cannot write object array without object codec') + raise RuntimeError("cannot write object array without object codec") # compress if self._compressor: @@ -2434,24 +2446,21 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if ( - isinstance(self.chunk_store, KVStore) - or isinstance(self._chunk_store, KVStore) - ): + if isinstance(self.chunk_store, KVStore) or isinstance(self._chunk_store, KVStore): cdata = ensure_bytes(cdata) return cdata def __repr__(self): t = type(self) - r = '<{}.{}'.format(t.__module__, t.__name__) + r = "<{}.{}".format(t.__module__, t.__name__) if self.name: - r += ' %r' % self.name - r += ' %s' % str(self.shape) - r += ' %s' % self.dtype + r += " %r" % self.name + r += " %s" % str(self.shape) + r += " %s" % self.dtype if self._read_only: - r += ' read-only' - r += '>' + r += " read-only" + r += ">" return r @property @@ -2483,13 +2492,12 @@ def info_items(self): return self._synchronized_op(self._info_items_nosync) def _info_items_nosync(self): - def typestr(o): - return '{}.{}'.format(type(o).__module__, type(o).__name__) + return "{}.{}".format(type(o).__module__, type(o).__name__) def bytestr(n): if n > 2**10: - return '{} ({})'.format(n, human_readable_size(n)) + return "{} ({})".format(n, human_readable_size(n)) else: return str(n) @@ -2497,41 +2505,39 @@ def bytestr(n): # basic info if self.name is not None: - items += [('Name', self.name)] + items += [("Name", self.name)] items += [ - ('Type', typestr(self)), - ('Data type', '%s' % self.dtype), - ('Shape', str(self.shape)), - ('Chunk shape', str(self.chunks)), - ('Order', self.order), - ('Read-only', str(self.read_only)), + ("Type", typestr(self)), + ("Data type", "%s" % self.dtype), + ("Shape", str(self.shape)), + ("Chunk shape", str(self.chunks)), + ("Order", self.order), + ("Read-only", str(self.read_only)), ] # filters if self.filters: for i, f in enumerate(self.filters): - items += [('Filter [%s]' % i, repr(f))] + items += [("Filter [%s]" % i, repr(f))] # compressor - items += [('Compressor', repr(self.compressor))] + items += [("Compressor", repr(self.compressor))] # synchronizer if self._synchronizer is not None: - items += [('Synchronizer type', typestr(self._synchronizer))] + items += [("Synchronizer type", typestr(self._synchronizer))] # storage info - items += [('Store type', typestr(self._store))] + items += [("Store type", typestr(self._store))] if self._chunk_store is not None: - items += [('Chunk store type', typestr(self._chunk_store))] - items += [('No. bytes', bytestr(self.nbytes))] + items += [("Chunk store type", typestr(self._chunk_store))] + items += [("No. bytes", bytestr(self.nbytes))] if self.nbytes_stored > 0: items += [ - ('No. bytes stored', bytestr(self.nbytes_stored)), - ('Storage ratio', '%.1f' % (self.nbytes / self.nbytes_stored)), + ("No. bytes stored", bytestr(self.nbytes_stored)), + ("Storage ratio", "%.1f" % (self.nbytes / self.nbytes_stored)), ] - items += [ - ('Chunks initialized', '{}/{}'.format(self.nchunks_initialized, self.nchunks)) - ] + items += [("Chunks initialized", "{}/{}".format(self.nchunks_initialized, self.nchunks))] return items @@ -2590,7 +2596,7 @@ def hexdigest(self, hashname="sha1"): # This is a bytes object on Python 3 and we want a str. if type(checksum) is not str: - checksum = checksum.decode('utf8') + checksum = checksum.decode("utf8") return checksum @@ -2682,8 +2688,7 @@ def _resize_nosync(self, *args): # determine the new number and arrangement of chunks chunks = self._chunks - new_cdata_shape = tuple(math.ceil(s / c) - for s, c in zip(new_shape, chunks)) + new_cdata_shape = tuple(math.ceil(s / c) for s, c in zip(new_shape, chunks)) # remove any chunks not within range # The idea is that, along each dimension, @@ -2752,18 +2757,18 @@ def append(self, data, axis=0): def _append_nosync(self, data, axis=0): # ensure data is array-like - if not hasattr(data, 'shape'): + if not hasattr(data, "shape"): data = np.asanyarray(data, like=self._meta_array) # ensure shapes are compatible for non-append dimensions - self_shape_preserved = tuple(s for i, s in enumerate(self._shape) - if i != axis) - data_shape_preserved = tuple(s for i, s in enumerate(data.shape) - if i != axis) + self_shape_preserved = tuple(s for i, s in enumerate(self._shape) if i != axis) + data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) if self_shape_preserved != data_shape_preserved: - raise ValueError('shape of data to append is not compatible with the array; ' - 'all dimensions must match except for the dimension being ' - 'appended') + raise ValueError( + "shape of data to append is not compatible with the array; " + "all dimensions must match except for the dimension being " + "appended" + ) # remember old shape old_shape = self._shape @@ -2787,9 +2792,16 @@ def _append_nosync(self, data, axis=0): return new_shape - def view(self, shape=None, chunks=None, dtype=None, - fill_value=None, filters=None, read_only=None, - synchronizer=None): + def view( + self, + shape=None, + chunks=None, + dtype=None, + fill_value=None, + filters=None, + read_only=None, + synchronizer=None, + ): """Return an array sharing the same data. Parameters @@ -2904,8 +2916,15 @@ def view(self, shape=None, chunks=None, dtype=None, read_only = self._read_only if synchronizer is None: synchronizer = self._synchronizer - a = Array(store=store, path=path, chunk_store=chunk_store, read_only=read_only, - synchronizer=synchronizer, cache_metadata=True, zarr_version=self._version) + a = Array( + store=store, + path=path, + chunk_store=chunk_store, + read_only=read_only, + synchronizer=synchronizer, + cache_metadata=True, + zarr_version=self._version, + ) a._is_view = True # allow override of some properties diff --git a/zarr/creation.py b/zarr/creation.py index dc8b8a157d..726d0b5932 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -11,18 +11,42 @@ ContainsArrayError, ContainsGroupError, ) -from zarr.storage import (contains_array, contains_group, default_compressor, - init_array, normalize_storage_path, - normalize_store_arg) +from zarr.storage import ( + contains_array, + contains_group, + default_compressor, + init_array, + normalize_storage_path, + normalize_store_arg, +) from zarr.util import normalize_dimension_separator -def create(shape, chunks=True, dtype=None, compressor='default', - fill_value: Optional[int] = 0, order='C', store=None, synchronizer=None, - overwrite=False, path=None, chunk_store=None, filters=None, - cache_metadata=True, cache_attrs=True, read_only=False, - object_codec=None, dimension_separator=None, write_empty_chunks=True, - *, zarr_version=None, meta_array=None, storage_transformers=(), **kwargs): +def create( + shape, + chunks=True, + dtype=None, + compressor="default", + fill_value: Optional[int] = 0, + order="C", + store=None, + synchronizer=None, + overwrite=False, + path=None, + chunk_store=None, + filters=None, + cache_metadata=True, + cache_attrs=True, + read_only=False, + object_codec=None, + dimension_separator=None, + write_empty_chunks=True, + *, + zarr_version=None, + meta_array=None, + storage_transformers=(), + **kwargs, +): """Create an array. Parameters @@ -150,11 +174,11 @@ def create(shape, chunks=True, dtype=None, compressor='default', """ if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) # handle polymorphic store arg store = normalize_store_arg(store, zarr_version=zarr_version, mode="w") - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) @@ -168,22 +192,43 @@ def create(shape, chunks=True, dtype=None, compressor='default', raise ValueError( f"Specified dimension_separator: {dimension_separator}" f"conflicts with store's separator: " - f"{store_separator}") + f"{store_separator}" + ) dimension_separator = normalize_dimension_separator(dimension_separator) if zarr_version > 2 and path is None: - path = '/' + path = "/" # initialize array metadata - init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, - fill_value=fill_value, order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters, object_codec=object_codec, - dimension_separator=dimension_separator, storage_transformers=storage_transformers) + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + overwrite=overwrite, + path=path, + chunk_store=chunk_store, + filters=filters, + object_codec=object_codec, + dimension_separator=dimension_separator, + storage_transformers=storage_transformers, + ) # instantiate array - z = Array(store, path=path, chunk_store=chunk_store, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, read_only=read_only, - write_empty_chunks=write_empty_chunks, meta_array=meta_array) + z = Array( + store, + path=path, + chunk_store=chunk_store, + synchronizer=synchronizer, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + read_only=read_only, + write_empty_chunks=write_empty_chunks, + meta_array=meta_array, + ) return z @@ -193,7 +238,7 @@ def _kwargs_compat(compressor, fill_value, kwargs): # to be compatible with h5py, as well as backwards-compatible with Zarr # 1.x, accept 'compression' and 'compression_opts' keyword arguments - if compressor != 'default': + if compressor != "default": # 'compressor' overrides 'compression' if "compression" in kwargs: warn( @@ -208,14 +253,14 @@ def _kwargs_compat(compressor, fill_value, kwargs): ) del kwargs["compression_opts"] - elif 'compression' in kwargs: - compression = kwargs.pop('compression') - compression_opts = kwargs.pop('compression_opts', None) + elif "compression" in kwargs: + compression = kwargs.pop("compression") + compression_opts = kwargs.pop("compression_opts", None) - if compression is None or compression == 'none': + if compression is None or compression == "none": compressor = None - elif compression == 'default': + elif compression == "default": compressor = default_compressor elif isinstance(compression, str): @@ -233,21 +278,21 @@ def _kwargs_compat(compressor, fill_value, kwargs): compressor = codec_cls(compression_opts) # be lenient here if user gives compressor as 'compression' - elif hasattr(compression, 'get_config'): + elif hasattr(compression, "get_config"): compressor = compression else: - raise ValueError('bad value for compression: %r' % compression) + raise ValueError("bad value for compression: %r" % compression) # handle 'fillvalue' - if 'fillvalue' in kwargs: + if "fillvalue" in kwargs: # to be compatible with h5py, accept 'fillvalue' instead of # 'fill_value' - fill_value = kwargs.pop('fillvalue') + fill_value = kwargs.pop("fillvalue") # ignore other keyword arguments for k in kwargs: - warn('ignoring keyword argument %r' % k) + warn("ignoring keyword argument %r" % k) return compressor, fill_value @@ -334,16 +379,13 @@ def _get_shape_chunks(a): shape = None chunks = None - if hasattr(a, 'shape') and \ - isinstance(a.shape, tuple): + if hasattr(a, "shape") and isinstance(a.shape, tuple): shape = a.shape - if hasattr(a, 'chunks') and \ - isinstance(a.chunks, tuple) and \ - (len(a.chunks) == len(a.shape)): + if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): chunks = a.chunks - elif hasattr(a, 'chunklen'): + elif hasattr(a, "chunklen"): # bcolz carray chunks = (a.chunklen,) + a.shape[1:] @@ -368,27 +410,27 @@ def array(data, **kwargs): """ # ensure data is array-like - if not hasattr(data, 'shape') or not hasattr(data, 'dtype'): + if not hasattr(data, "shape") or not hasattr(data, "dtype"): data = np.asanyarray(data) # setup dtype - kw_dtype = kwargs.get('dtype') + kw_dtype = kwargs.get("dtype") if kw_dtype is None: - kwargs['dtype'] = data.dtype + kwargs["dtype"] = data.dtype else: - kwargs['dtype'] = kw_dtype + kwargs["dtype"] = kw_dtype # setup shape and chunks data_shape, data_chunks = _get_shape_chunks(data) - kwargs['shape'] = data_shape - kw_chunks = kwargs.get('chunks') + kwargs["shape"] = data_shape + kw_chunks = kwargs.get("chunks") if kw_chunks is None: - kwargs['chunks'] = data_chunks + kwargs["chunks"] = data_chunks else: - kwargs['chunks'] = kw_chunks + kwargs["chunks"] = kw_chunks # pop read-only to apply after storing the data - read_only = kwargs.pop('read_only', False) + read_only = kwargs.pop("read_only", False) # instantiate array z = create(**kwargs) @@ -425,7 +467,7 @@ def open_array( zarr_version=None, dimension_separator=None, meta_array=None, - **kwargs + **kwargs, ): """Open an array using file-mode-like semantics. @@ -539,27 +581,27 @@ def open_array( # a : read/write if exists, create otherwise (default) if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) # handle polymorphic store arg - store = normalize_store_arg(store, storage_options=storage_options, - mode=mode, zarr_version=zarr_version) - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + store = normalize_store_arg( + store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, - storage_options=storage_options, - mode=mode, - zarr_version=zarr_version) + chunk_store = normalize_store_arg( + chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) # respect the dimension separator specified in a store, if present if dimension_separator is None: - if hasattr(store, '_dimension_separator'): + if hasattr(store, "_dimension_separator"): dimension_separator = store._dimension_separator else: - dimension_separator = '.' if zarr_version == 2 else '/' + dimension_separator = "." if zarr_version == 2 else "/" if zarr_version == 3 and path is None: - path = 'array' # TODO: raise ValueError instead? + path = "array" # TODO: raise ValueError instead? path = normalize_storage_path(path) @@ -572,48 +614,84 @@ def open_array( # ensure store is initialized - if mode in ['r', 'r+']: + if mode in ["r", "r+"]: if not contains_array(store, path=path): if contains_group(store, path=path): raise ContainsGroupError(path) raise ArrayNotFoundError(path) - elif mode == 'w': - init_array(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, filters=filters, overwrite=True, path=path, - object_codec=object_codec, chunk_store=chunk_store, - dimension_separator=dimension_separator) - - elif mode == 'a': + elif mode == "w": + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + filters=filters, + overwrite=True, + path=path, + object_codec=object_codec, + chunk_store=chunk_store, + dimension_separator=dimension_separator, + ) + + elif mode == "a": if not contains_array(store, path=path): if contains_group(store, path=path): raise ContainsGroupError(path) - init_array(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, filters=filters, path=path, - object_codec=object_codec, chunk_store=chunk_store, - dimension_separator=dimension_separator) + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + filters=filters, + path=path, + object_codec=object_codec, + chunk_store=chunk_store, + dimension_separator=dimension_separator, + ) - elif mode in ['w-', 'x']: + elif mode in ["w-", "x"]: if contains_group(store, path=path): raise ContainsGroupError(path) elif contains_array(store, path=path): raise ContainsArrayError(path) else: - init_array(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, filters=filters, path=path, - object_codec=object_codec, chunk_store=chunk_store, - dimension_separator=dimension_separator) + init_array( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + filters=filters, + path=path, + object_codec=object_codec, + chunk_store=chunk_store, + dimension_separator=dimension_separator, + ) # determine read only status - read_only = mode == 'r' + read_only = mode == "r" # instantiate array - z = Array(store, read_only=read_only, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, path=path, - chunk_store=chunk_store, write_empty_chunks=write_empty_chunks, meta_array=meta_array) + z = Array( + store, + read_only=read_only, + synchronizer=synchronizer, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + path=path, + chunk_store=chunk_store, + write_empty_chunks=write_empty_chunks, + meta_array=meta_array, + ) return z @@ -622,21 +700,21 @@ def _like_args(a, kwargs): shape, chunks = _get_shape_chunks(a) if shape is not None: - kwargs.setdefault('shape', shape) + kwargs.setdefault("shape", shape) if chunks is not None: - kwargs.setdefault('chunks', chunks) + kwargs.setdefault("chunks", chunks) - if hasattr(a, 'dtype'): - kwargs.setdefault('dtype', a.dtype) + if hasattr(a, "dtype"): + kwargs.setdefault("dtype", a.dtype) if isinstance(a, Array): - kwargs.setdefault('compressor', a.compressor) - kwargs.setdefault('order', a.order) - kwargs.setdefault('filters', a.filters) - kwargs.setdefault('zarr_version', a._version) + kwargs.setdefault("compressor", a.compressor) + kwargs.setdefault("order", a.order) + kwargs.setdefault("filters", a.filters) + kwargs.setdefault("zarr_version", a._version) else: - kwargs.setdefault('compressor', 'default') - kwargs.setdefault('order', 'C') + kwargs.setdefault("compressor", "default") + kwargs.setdefault("order", "C") def empty_like(a, **kwargs): @@ -661,7 +739,7 @@ def full_like(a, **kwargs): """Create a filled array like `a`.""" _like_args(a, kwargs) if isinstance(a, Array): - kwargs.setdefault('fill_value', a.fill_value) + kwargs.setdefault("fill_value", a.fill_value) return full(**kwargs) @@ -669,5 +747,5 @@ def open_like(a, path, **kwargs): """Open a persistent array like `a`.""" _like_args(a, kwargs) if isinstance(a, Array): - kwargs.setdefault('fill_value', a.fill_value) + kwargs.setdefault("fill_value", a.fill_value) return open_array(path, **kwargs) diff --git a/zarr/errors.py b/zarr/errors.py index 808cbe99a4..30c9b13d39 100644 --- a/zarr/errors.py +++ b/zarr/errors.py @@ -67,8 +67,9 @@ def __init__(self): def err_too_many_indices(selection, shape): - raise IndexError('too many indices for array; expected {}, got {}' - .format(len(shape), len(selection))) + raise IndexError( + "too many indices for array; expected {}, got {}".format(len(shape), len(selection)) + ) class VindexInvalidSelectionError(_BaseZarrIndexError): diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 18e7ac7863..0f2ff850bd 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -3,12 +3,27 @@ import numpy as np -from zarr._storage.store import (_get_metadata_suffix, data_root, meta_root, - DEFAULT_ZARR_VERSION, assert_zarr_v3_api_available) +from zarr._storage.store import ( + _get_metadata_suffix, + data_root, + meta_root, + DEFAULT_ZARR_VERSION, + assert_zarr_v3_api_available, +) from zarr.attrs import Attributes from zarr.core import Array -from zarr.creation import (array, create, empty, empty_like, full, full_like, - ones, ones_like, zeros, zeros_like) +from zarr.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + zeros, + zeros_like, +) from zarr.errors import ( ContainsArrayError, ContainsGroupError, @@ -120,12 +135,21 @@ class Group(MutableMapping): """ - def __init__(self, store, path=None, read_only=False, chunk_store=None, - cache_attrs=True, synchronizer=None, zarr_version=None, *, - meta_array=None): + def __init__( + self, + store, + path=None, + read_only=False, + chunk_store=None, + cache_attrs=True, + synchronizer=None, + zarr_version=None, + *, + meta_array=None + ): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() @@ -136,9 +160,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, self._chunk_store = chunk_store self._path = normalize_storage_path(path) if self._path: - self._key_prefix = self._path + '/' + self._key_prefix = self._path + "/" else: - self._key_prefix = '' + self._key_prefix = "" self._read_only = read_only self._synchronizer = synchronizer if meta_array is not None: @@ -182,8 +206,9 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, # Note: mkey doesn't actually exist for implicit groups, but the # object can still be created. akey = mkey - self._attrs = Attributes(store, key=akey, read_only=read_only, - cache=cache_attrs, synchronizer=synchronizer) + self._attrs = Attributes( + store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer + ) # setup info self._info = InfoReporter(self) @@ -204,15 +229,15 @@ def name(self): if self._path: # follow h5py convention: add leading slash name = self._path - if name[0] != '/': - name = '/' + name + if name[0] != "/": + name = "/" + name return name - return '/' + return "/" @property def basename(self): """Final component of name.""" - return self.name.split('/')[-1] + return self.name.split("/")[-1] @property def read_only(self): @@ -252,10 +277,10 @@ def meta_array(self): def __eq__(self, other): return ( - isinstance(other, Group) and - self._store == other.store and - self._read_only == other.read_only and - self._path == other.path + isinstance(other, Group) + and self._store == other.store + and self._read_only == other.read_only + and self._path == other.path # N.B., no need to compare attributes, should be covered by # store comparison ) @@ -279,11 +304,10 @@ def __iter__(self): quux """ - if getattr(self._store, '_store_version', 2) == 2: + if getattr(self._store, "_store_version", 2) == 2: for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key - if (contains_array(self._store, path) or - contains_group(self._store, path)): + if contains_array(self._store, path) or contains_group(self._store, path): yield key else: # TODO: Should this iterate over data folders and/or metadata @@ -296,15 +320,15 @@ def __iter__(self): # yield any groups or arrays sfx = self._metadata_key_suffix for key in keys: - len_suffix = len('.group') + len(sfx) # same for .array - if key.endswith(('.group' + sfx, '.array' + sfx)): + len_suffix = len(".group") + len(sfx) # same for .array + if key.endswith((".group" + sfx, ".array" + sfx)): yield key[name_start:-len_suffix] # also yield any implicit groups for prefix in prefixes: - prefix = prefix.rstrip('/') + prefix = prefix.rstrip("/") # only implicit if there is no .group.sfx file - if not prefix + '.group' + sfx in self._store: + if not prefix + ".group" + sfx in self._store: yield prefix[name_start:] # Note: omit data/root/ to avoid duplicate listings @@ -316,12 +340,12 @@ def __len__(self): def __repr__(self): t = type(self) - r = '<{}.{}'.format(t.__module__, t.__name__) + r = "<{}.{}".format(t.__module__, t.__name__) if self.name: - r += ' %r' % self.name + r += " %r" % self.name if self._read_only: - r += ' read-only' - r += '>' + r += " read-only" + r += ">" return r def __enter__(self): @@ -333,39 +357,38 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.store.close() def info_items(self): - def typestr(o): - return '{}.{}'.format(type(o).__module__, type(o).__name__) + return "{}.{}".format(type(o).__module__, type(o).__name__) items = [] # basic info if self.name is not None: - items += [('Name', self.name)] + items += [("Name", self.name)] items += [ - ('Type', typestr(self)), - ('Read-only', str(self.read_only)), + ("Type", typestr(self)), + ("Read-only", str(self.read_only)), ] # synchronizer if self._synchronizer is not None: - items += [('Synchronizer type', typestr(self._synchronizer))] + items += [("Synchronizer type", typestr(self._synchronizer))] # storage info - items += [('Store type', typestr(self._store))] + items += [("Store type", typestr(self._store))] if self._chunk_store is not None: - items += [('Chunk store type', typestr(self._chunk_store))] + items += [("Chunk store type", typestr(self._chunk_store))] # members - items += [('No. members', len(self))] + items += [("No. members", len(self))] array_keys = sorted(self.array_keys()) group_keys = sorted(self.group_keys()) - items += [('No. arrays', len(array_keys))] - items += [('No. groups', len(group_keys))] + items += [("No. arrays", len(array_keys))] + items += [("No. groups", len(group_keys))] if array_keys: - items += [('Arrays', ', '.join(array_keys))] + items += [("Arrays", ", ".join(array_keys))] if group_keys: - items += [('Groups', ', '.join(group_keys))] + items += [("Groups", ", ".join(group_keys))] return items @@ -385,7 +408,7 @@ def __setstate__(self, state): self.__init__(**state) def _item_path(self, item): - absolute = isinstance(item, str) and item and item[0] == '/' + absolute = isinstance(item, str) and item and item[0] == "/" path = normalize_storage_path(item) if not absolute and self._path: path = self._key_prefix + path @@ -409,8 +432,9 @@ def __contains__(self, item): """ path = self._item_path(item) - return contains_array(self._store, path) or \ - contains_group(self._store, path, explicit_only=False) + return contains_array(self._store, path) or contains_group( + self._store, path, explicit_only=False + ) def __getitem__(self, item): """Obtain a group member. @@ -435,23 +459,41 @@ def __getitem__(self, item): """ path = self._item_path(item) if contains_array(self._store, path): - return Array(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, - synchronizer=self._synchronizer, cache_attrs=self.attrs.cache, - zarr_version=self._version, meta_array=self._meta_array) + return Array( + self._store, + read_only=self._read_only, + path=path, + chunk_store=self._chunk_store, + synchronizer=self._synchronizer, + cache_attrs=self.attrs.cache, + zarr_version=self._version, + meta_array=self._meta_array, + ) elif contains_group(self._store, path, explicit_only=True): - return Group(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version, - meta_array=self._meta_array) + return Group( + self._store, + read_only=self._read_only, + path=path, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + meta_array=self._meta_array, + ) elif self._version == 3: - implicit_group = meta_root + path + '/' + implicit_group = meta_root + path + "/" # non-empty folder in the metadata path implies an implicit group if self._store.list_prefix(implicit_group): - return Group(self._store, read_only=self._read_only, path=path, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version, - meta_array=self._meta_array) + return Group( + self._store, + read_only=self._read_only, + path=path, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + meta_array=self._meta_array, + ) else: raise KeyError(item) else: @@ -465,8 +507,9 @@ def __delitem__(self, item): def _delitem_nosync(self, item): path = self._item_path(item) - if contains_array(self._store, path) or \ - contains_group(self._store, path, explicit_only=False): + if contains_array(self._store, path) or contains_group( + self._store, path, explicit_only=False + ): rmdir(self._store, path) else: raise KeyError(item) @@ -510,13 +553,13 @@ def group_keys(self): yield key else: dir_name = meta_root + self._path - group_sfx = '.group' + self._metadata_key_suffix + group_sfx = ".group" + self._metadata_key_suffix # The fact that we call sorted means this can't be a streaming generator. # The keys are already in memory. all_keys = sorted(listdir(self._store, dir_name)) for key in all_keys: if key.endswith(group_sfx): - key = key[:-len(group_sfx)] + key = key[: -len(group_sfx)] if key in all_keys: # otherwise we will double count this group continue @@ -555,7 +598,8 @@ def groups(self): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version) + zarr_version=self._version, + ) else: for key in self.group_keys(): @@ -567,7 +611,8 @@ def groups(self): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version) + zarr_version=self._version, + ) def array_keys(self, recurse=False): """Return an iterator over member names for arrays only. @@ -591,9 +636,7 @@ def array_keys(self, recurse=False): ['baz', 'quux'] """ - return self._array_iter(keys_only=True, - method='array_keys', - recurse=recurse) + return self._array_iter(keys_only=True, method="array_keys", recurse=recurse) def arrays(self, recurse=False): """Return an iterator over (name, value) pairs for arrays only. @@ -619,9 +662,7 @@ def arrays(self, recurse=False): quux """ - return self._array_iter(keys_only=False, - method='arrays', - recurse=recurse) + return self._array_iter(keys_only=False, method="arrays", recurse=recurse) def _array_iter(self, keys_only, method, recurse): if self._version == 2: @@ -635,12 +676,12 @@ def _array_iter(self, keys_only, method, recurse): yield from getattr(group, method)(recurse=recurse) else: dir_name = meta_root + self._path - array_sfx = '.array' + self._metadata_key_suffix - group_sfx = '.group' + self._metadata_key_suffix + array_sfx = ".array" + self._metadata_key_suffix + group_sfx = ".group" + self._metadata_key_suffix for key in sorted(listdir(self._store, dir_name)): if key.endswith(array_sfx): - key = key[:-len(array_sfx)] + key = key[: -len(array_sfx)] _key = key.rstrip("/") yield _key if keys_only else (_key, self[key]) @@ -794,8 +835,7 @@ def visit(self, func): return self.visitvalues(lambda o: func(o.name[base_len:].lstrip("/"))) def visitkeys(self, func): - """An alias for :py:meth:`~Group.visit`. - """ + """An alias for :py:meth:`~Group.visit`.""" return self.visit(func) @@ -924,12 +964,17 @@ def _create_group_nosync(self, name, overwrite=False): path = self._item_path(name) # create terminal group - init_group(self._store, path=path, chunk_store=self._chunk_store, - overwrite=overwrite) - - return Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version) + init_group(self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite) + + return Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + ) def create_groups(self, *names, **kwargs): """Convenience method to create multiple groups in a single call.""" @@ -960,20 +1005,26 @@ def require_group(self, name, overwrite=False): """ - return self._write_op(self._require_group_nosync, name, - overwrite=overwrite) + return self._write_op(self._require_group_nosync, name, overwrite=overwrite) def _require_group_nosync(self, name, overwrite=False): path = self._item_path(name) # create terminal group if necessary if not contains_group(self._store, path): - init_group(store=self._store, path=path, chunk_store=self._chunk_store, - overwrite=overwrite) + init_group( + store=self._store, path=path, chunk_store=self._chunk_store, overwrite=overwrite + ) - return Group(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, zarr_version=self._version) + return Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + ) def require_groups(self, *names): """Convenience method to require multiple groups in a single call.""" @@ -1048,17 +1099,15 @@ def _create_dataset_nosync(self, name, data=None, **kwargs): path = self._item_path(name) # determine synchronizer - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) # create array if data is None: - a = create(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + a = create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) else: - a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + a = array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) return a @@ -1084,11 +1133,11 @@ def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): """ - return self._write_op(self._require_dataset_nosync, name, shape=shape, - dtype=dtype, exact=exact, **kwargs) + return self._write_op( + self._require_dataset_nosync, name, shape=shape, dtype=dtype, exact=exact, **kwargs + ) - def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, - **kwargs): + def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): path = self._item_path(name) @@ -1096,31 +1145,37 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, # array already exists at path, validate that it is the right shape and type - synchronizer = kwargs.get('synchronizer', self._synchronizer) - cache_metadata = kwargs.get('cache_metadata', True) - cache_attrs = kwargs.get('cache_attrs', self.attrs.cache) - a = Array(self._store, path=path, read_only=self._read_only, - chunk_store=self._chunk_store, synchronizer=synchronizer, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - meta_array=self._meta_array) + synchronizer = kwargs.get("synchronizer", self._synchronizer) + cache_metadata = kwargs.get("cache_metadata", True) + cache_attrs = kwargs.get("cache_attrs", self.attrs.cache) + a = Array( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + synchronizer=synchronizer, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + meta_array=self._meta_array, + ) shape = normalize_shape(shape) if shape != a.shape: - raise TypeError('shape do not match existing array; expected {}, got {}' - .format(a.shape, shape)) + raise TypeError( + "shape do not match existing array; expected {}, got {}".format(a.shape, shape) + ) dtype = np.dtype(dtype) if exact: if dtype != a.dtype: - raise TypeError('dtypes do not match exactly; expected {}, got {}' - .format(a.dtype, dtype)) + raise TypeError( + "dtypes do not match exactly; expected {}, got {}".format(a.dtype, dtype) + ) else: if not np.can_cast(dtype, a.dtype): - raise TypeError('dtypes ({}, {}) cannot be safely cast' - .format(dtype, a.dtype)) + raise TypeError("dtypes ({}, {}) cannot be safely cast".format(dtype, a.dtype)) return a else: - return self._create_dataset_nosync(name, shape=shape, dtype=dtype, - **kwargs) + return self._create_dataset_nosync(name, shape=shape, dtype=dtype, **kwargs) def create(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1129,10 +1184,9 @@ def create(self, name, **kwargs): def _create_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return create(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return create(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def empty(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1141,10 +1195,9 @@ def empty(self, name, **kwargs): def _empty_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return empty(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return empty(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def zeros(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1153,10 +1206,9 @@ def zeros(self, name, **kwargs): def _zeros_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return zeros(store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return zeros(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def ones(self, name, **kwargs): """Create an array. Keyword arguments as per @@ -1165,8 +1217,8 @@ def ones(self, name, **kwargs): def _ones_nosync(self, name, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) return ones(store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def full(self, name, fill_value, **kwargs): @@ -1176,10 +1228,15 @@ def full(self, name, fill_value, **kwargs): def _full_nosync(self, name, fill_value, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return full(store=self._store, path=path, chunk_store=self._chunk_store, - fill_value=fill_value, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return full( + store=self._store, + path=path, + chunk_store=self._chunk_store, + fill_value=fill_value, + **kwargs + ) def array(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1188,10 +1245,9 @@ def array(self, name, data, **kwargs): def _array_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return array(data, store=self._store, path=path, chunk_store=self._chunk_store, - **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return array(data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs) def empty_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1200,10 +1256,11 @@ def empty_like(self, name, data, **kwargs): def _empty_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return empty_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return empty_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def zeros_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1212,10 +1269,11 @@ def zeros_like(self, name, data, **kwargs): def _zeros_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return zeros_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return zeros_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def ones_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1224,10 +1282,11 @@ def ones_like(self, name, data, **kwargs): def _ones_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return ones_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return ones_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def full_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per @@ -1236,10 +1295,11 @@ def full_like(self, name, data, **kwargs): def _full_like_nosync(self, name, data, **kwargs): path = self._item_path(name) - kwargs.setdefault('synchronizer', self._synchronizer) - kwargs.setdefault('cache_attrs', self.attrs.cache) - return full_like(data, store=self._store, path=path, - chunk_store=self._chunk_store, **kwargs) + kwargs.setdefault("synchronizer", self._synchronizer) + kwargs.setdefault("cache_attrs", self.attrs.cache) + return full_like( + data, store=self._store, path=path, chunk_store=self._chunk_store, **kwargs + ) def _move_nosync(self, path, new_path): rename(self._store, path, new_path) @@ -1261,11 +1321,14 @@ def move(self, source, dest): dest = self._item_path(dest) # Check that source exists. - if not (contains_array(self._store, source) or - contains_group(self._store, source, explicit_only=False)): + if not ( + contains_array(self._store, source) + or contains_group(self._store, source, explicit_only=False) + ): raise ValueError('The source, "%s", does not exist.' % source) - if (contains_array(self._store, dest) or - contains_group(self._store, dest, explicit_only=False)): + if contains_array(self._store, dest) or contains_group( + self._store, dest, explicit_only=False + ): raise ValueError('The dest, "%s", already exists.' % dest) # Ensure groups needed for `dest` exist. @@ -1275,23 +1338,30 @@ def move(self, source, dest): self._write_op(self._move_nosync, source, dest) -def _normalize_store_arg(store, *, storage_options=None, mode="r", - zarr_version=None): +def _normalize_store_arg(store, *, storage_options=None, mode="r", zarr_version=None): if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() if store is None: return MemoryStore() if zarr_version == 2 else MemoryStoreV3() - return normalize_store_arg(store, - storage_options=storage_options, mode=mode, - zarr_version=zarr_version) - - -def group(store=None, overwrite=False, chunk_store=None, - cache_attrs=True, synchronizer=None, path=None, *, zarr_version=None): + return normalize_store_arg( + store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) + + +def group( + store=None, + overwrite=False, + chunk_store=None, + cache_attrs=True, + synchronizer=None, + path=None, + *, + zarr_version=None +): """Create a group. Parameters @@ -1336,9 +1406,9 @@ def group(store=None, overwrite=False, chunk_store=None, """ # handle polymorphic store arg - store = _normalize_store_arg(store, zarr_version=zarr_version, mode='w') + store = _normalize_store_arg(store, zarr_version=zarr_version, mode="w") if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() @@ -1352,16 +1422,31 @@ def group(store=None, overwrite=False, chunk_store=None, requires_init = overwrite or not contains_group(store, path) if requires_init: - init_group(store, overwrite=overwrite, chunk_store=chunk_store, - path=path) - - return Group(store, read_only=False, chunk_store=chunk_store, - cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, - zarr_version=zarr_version) - - -def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=None, - chunk_store=None, storage_options=None, *, zarr_version=None, meta_array=None): + init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) + + return Group( + store, + read_only=False, + chunk_store=chunk_store, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + zarr_version=zarr_version, + ) + + +def open_group( + store=None, + mode="a", + cache_attrs=True, + synchronizer=None, + path=None, + chunk_store=None, + storage_options=None, + *, + zarr_version=None, + meta_array=None +): """Open a group using file-mode-like semantics. Parameters @@ -1414,44 +1499,41 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N # handle polymorphic store arg store = _normalize_store_arg( - store, storage_options=storage_options, mode=mode, - zarr_version=zarr_version) + store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) if zarr_version is None: - zarr_version = getattr(store, '_store_version', DEFAULT_ZARR_VERSION) + zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) if zarr_version != 2: assert_zarr_v3_api_available() if chunk_store is not None: - chunk_store = _normalize_store_arg(chunk_store, - storage_options=storage_options, - mode=mode, - zarr_version=zarr_version) - if getattr(chunk_store, '_store_version', DEFAULT_ZARR_VERSION) != zarr_version: - raise ValueError( # pragma: no cover - "zarr_version of store and chunk_store must match" - ) + chunk_store = _normalize_store_arg( + chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version + ) + if getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) != zarr_version: + raise ValueError("zarr_version of store and chunk_store must match") # pragma: no cover path = normalize_storage_path(path) # ensure store is initialized - if mode in ['r', 'r+']: + if mode in ["r", "r+"]: if not contains_group(store, path=path): if contains_array(store, path=path): raise ContainsArrayError(path) raise GroupNotFoundError(path) - elif mode == 'w': + elif mode == "w": init_group(store, overwrite=True, path=path, chunk_store=chunk_store) - elif mode == 'a': + elif mode == "a": if not contains_group(store, path=path): if contains_array(store, path=path): raise ContainsArrayError(path) init_group(store, path=path, chunk_store=chunk_store) - elif mode in ['w-', 'x']: + elif mode in ["w-", "x"]: if contains_array(store, path=path): raise ContainsArrayError(path) elif contains_group(store, path=path): @@ -1460,8 +1542,15 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N init_group(store, path=path, chunk_store=chunk_store) # determine read only status - read_only = mode == 'r' - - return Group(store, read_only=read_only, cache_attrs=cache_attrs, - synchronizer=synchronizer, path=path, chunk_store=chunk_store, - zarr_version=zarr_version, meta_array=meta_array) + read_only = mode == "r" + + return Group( + store, + read_only=read_only, + cache_attrs=cache_attrs, + synchronizer=synchronizer, + path=path, + chunk_store=chunk_store, + zarr_version=zarr_version, + meta_array=meta_array, + ) diff --git a/zarr/indexing.py b/zarr/indexing.py index bc2afba992..487cc8b9d9 100644 --- a/zarr/indexing.py +++ b/zarr/indexing.py @@ -34,17 +34,14 @@ def is_integer_list(x): def is_integer_array(x, ndim=None): - t = not np.isscalar(x) and \ - hasattr(x, 'shape') and \ - hasattr(x, 'dtype') and \ - x.dtype.kind in 'ui' + t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" if ndim is not None: t = t and len(x.shape) == ndim return t def is_bool_array(x, ndim=None): - t = hasattr(x, 'shape') and hasattr(x, 'dtype') and x.dtype == bool + t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool if ndim is not None: t = t and len(x.shape) == ndim return t @@ -80,24 +77,15 @@ def is_pure_fancy_indexing(selection, ndim): no_slicing = ( isinstance(selection, tuple) and len(selection) == ndim - and not ( - any(isinstance(elem, slice) or elem is Ellipsis - for elem in selection) - ) + and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) ) return ( - no_slicing and - all( - is_integer(elem) - or is_integer_list(elem) - or is_integer_array(elem) - for elem in selection - ) and - any( - is_integer_list(elem) - or is_integer_array(elem) + no_slicing + and all( + is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) for elem in selection ) + and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) ) @@ -112,12 +100,13 @@ def is_pure_orthogonal_indexing(selection, ndim): # Case two: selection contains either zero or one integer iterables. # All other selection elements are slices or integers return ( - isinstance(selection, tuple) and len(selection) == ndim and - sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 and - all( - is_integer_list(elem) or is_integer_array(elem) - or isinstance(elem, (int, slice)) for - elem in selection) + isinstance(selection, tuple) + and len(selection) == ndim + and sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 + and all( + is_integer_list(elem) or is_integer_array(elem) or isinstance(elem, (int, slice)) + for elem in selection + ) ) @@ -138,8 +127,7 @@ def normalize_integer_selection(dim_sel, dim_len): ChunkDimProjection = collections.namedtuple( - 'ChunkDimProjection', - ('dim_chunk_ix', 'dim_chunk_sel', 'dim_out_sel') + "ChunkDimProjection", ("dim_chunk_ix", "dim_chunk_sel", "dim_out_sel") ) """A mapping from chunk to output array for a single dimension. @@ -156,7 +144,6 @@ def normalize_integer_selection(dim_sel, dim_len): class IntDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): # normalize @@ -181,7 +168,6 @@ def ceildiv(a, b): class SliceDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): # normalize @@ -234,8 +220,7 @@ def __iter__(self): dim_chunk_sel_stop = self.stop - dim_offset dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) - dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), - self.step) + dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) # If there are no elements on the selection within this chunk, then skip if dim_chunk_nitems == 0: @@ -291,8 +276,7 @@ def replace_ellipsis(selection, shape): def replace_lists(selection): return tuple( - np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel - for dim_sel in selection + np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection ) @@ -303,8 +287,7 @@ def ensure_tuple(v): ChunkProjection = collections.namedtuple( - 'ChunkProjection', - ('chunk_coords', 'chunk_selection', 'out_selection') + "ChunkProjection", ("chunk_coords", "chunk_selection", "out_selection") ) """A mapping of items from chunk to output array. Can be used to extract items from the chunk array for loading into an output array. Can also be used to extract items from a @@ -336,10 +319,7 @@ def is_positive_slice(s): def is_contiguous_selection(selection): selection = ensure_tuple(selection) - return all( - (is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) - for s in selection - ) + return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) def is_basic_selection(selection): @@ -349,7 +329,6 @@ def is_basic_selection(selection): # noinspection PyProtectedMember class BasicIndexer: - def __init__(self, selection, array): # handle ellipsis @@ -357,8 +336,7 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in \ - zip(selection, array._shape, array._chunks): + for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -367,15 +345,15 @@ def __init__(self, selection, array): dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) else: - raise IndexError('unsupported selection item for basic indexing; ' - 'expected integer or slice, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for basic indexing; " + "expected integer or slice, got {!r}".format(type(dim_sel)) + ) dim_indexers.append(dim_indexer) self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers - if not isinstance(s, IntDimIndexer)) + self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) self.drop_axes = None def __iter__(self): @@ -383,25 +361,28 @@ def __iter__(self): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple(p.dim_out_sel for p in dim_projections - if p.dim_out_sel is not None) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) yield ChunkProjection(chunk_coords, chunk_selection, out_selection) class BoolArrayDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): # check number of dimensions if not is_bool_array(dim_sel, 1): - raise IndexError('Boolean arrays in an orthogonal selection must ' - 'be 1-dimensional only') + raise IndexError( + "Boolean arrays in an orthogonal selection must " "be 1-dimensional only" + ) # check shape if dim_sel.shape[0] != dim_len: - raise IndexError('Boolean array has the wrong length for dimension; ' - 'expected {}, got {}'.format(dim_len, dim_sel.shape[0])) + raise IndexError( + "Boolean array has the wrong length for dimension; " + "expected {}, got {}".format(dim_len, dim_sel.shape[0]) + ) # store attributes self.dim_sel = dim_sel @@ -410,11 +391,11 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) # precompute number of selected items for each chunk - self.chunk_nitems = np.zeros(self.nchunks, dtype='i8') + self.chunk_nitems = np.zeros(self.nchunks, dtype="i8") for dim_chunk_ix in range(self.nchunks): dim_offset = dim_chunk_ix * self.dim_chunk_len self.chunk_nitems[dim_chunk_ix] = np.count_nonzero( - self.dim_sel[dim_offset:dim_offset + self.dim_chunk_len] + self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] ) self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) self.nitems = self.chunk_nitems_cumsum[-1] @@ -427,12 +408,12 @@ def __iter__(self): # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel[dim_offset:dim_offset + self.dim_chunk_len] + dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] # pad out if final chunk if dim_chunk_sel.shape[0] < self.dim_chunk_len: tmp = np.zeros(self.dim_chunk_len, dtype=bool) - tmp[:dim_chunk_sel.shape[0]] = dim_chunk_sel + tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel dim_chunk_sel = tmp # find region in output @@ -482,14 +463,22 @@ def boundscheck_indices(x, dim_len): class IntArrayDimIndexer: """Integer array selection against a single dimension.""" - def __init__(self, dim_sel, dim_len, dim_chunk_len, wraparound=True, boundscheck=True, - order=Order.UNKNOWN): + def __init__( + self, + dim_sel, + dim_len, + dim_chunk_len, + wraparound=True, + boundscheck=True, + order=Order.UNKNOWN, + ): # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): - raise IndexError('integer arrays in an orthogonal selection must be ' - '1-dimensional only') + raise IndexError( + "integer arrays in an orthogonal selection must be " "1-dimensional only" + ) # handle wraparound if wraparound: @@ -570,10 +559,14 @@ def ix_(selection, shape): selection = replace_ellipsis(selection, shape) # replace slice and int as these are not supported by numpy.ix_ - selection = [slice_to_range(dim_sel, dim_len) if isinstance(dim_sel, slice) - else [dim_sel] if is_integer(dim_sel) - else dim_sel - for dim_sel, dim_len in zip(selection, shape)] + selection = [ + slice_to_range(dim_sel, dim_len) + if isinstance(dim_sel, slice) + else [dim_sel] + if is_integer(dim_sel) + else dim_sel + for dim_sel, dim_len in zip(selection, shape) + ] # now get numpy to convert to a coordinate selection selection = np.ix_(*selection) @@ -608,7 +601,6 @@ def oindex_set(a, selection, value): # noinspection PyProtectedMember class OrthogonalIndexer: - def __init__(self, selection, array): # handle ellipsis @@ -619,8 +611,7 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in \ - zip(selection, array._shape, array._chunks): + for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -635,21 +626,24 @@ def __init__(self, selection, array): dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) else: - raise IndexError('unsupported selection item for orthogonal indexing; ' - 'expected integer, slice, integer array or Boolean ' - 'array, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for orthogonal indexing; " + "expected integer, slice, integer array or Boolean " + "array, got {!r}".format(type(dim_sel)) + ) dim_indexers.append(dim_indexer) self.array = array self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers - if not isinstance(s, IntDimIndexer)) + self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) self.is_advanced = not is_basic_selection(selection) if self.is_advanced: - self.drop_axes = tuple(i for i, dim_indexer in enumerate(self.dim_indexers) - if isinstance(dim_indexer, IntDimIndexer)) + self.drop_axes = tuple( + i + for i, dim_indexer in enumerate(self.dim_indexers) + if isinstance(dim_indexer, IntDimIndexer) + ) else: self.drop_axes = None @@ -658,8 +652,9 @@ def __iter__(self): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple(p.dim_out_sel for p in dim_projections - if p.dim_out_sel is not None) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) # handle advanced indexing arrays orthogonally if self.is_advanced: @@ -678,7 +673,6 @@ def __iter__(self): class OIndex: - def __init__(self, array): self.array = array @@ -697,7 +691,6 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember class BlockIndexer: - def __init__(self, selection, array): # handle ellipsis @@ -708,8 +701,7 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] - for dim_sel, dim_len, dim_chunk_size in \ - zip(selection, array._shape, array._chunks): + for dim_sel, dim_len, dim_chunk_size in zip(selection, array._shape, array._chunks): dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) if is_integer(dim_sel): @@ -725,9 +717,10 @@ def __init__(self, selection, array): stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks if dim_sel.step not in {1, None}: - raise IndexError('unsupported selection item for block indexing; ' - 'expected integer or slice with step=1, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for block indexing; " + "expected integer or slice with step=1, got {!r}".format(type(dim_sel)) + ) # Can't reuse wraparound_indices because it expects a numpy array # We have integers here. @@ -741,9 +734,10 @@ def __init__(self, selection, array): slice_ = slice(start, stop) else: - raise IndexError('unsupported selection item for block indexing; ' - 'expected integer or slice, got {!r}' - .format(type(dim_sel))) + raise IndexError( + "unsupported selection item for block indexing; " + "expected integer or slice, got {!r}".format(type(dim_sel)) + ) dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) dim_indexers.append(dim_indexer) @@ -759,14 +753,14 @@ def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple(p.dim_out_sel for p in dim_projections - if p.dim_out_sel is not None) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) yield ChunkProjection(chunk_coords, chunk_selection, out_selection) class BlockIndex: - def __init__(self, array): self.array = array @@ -785,25 +779,20 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember def is_coordinate_selection(selection, array): - return ( - (len(selection) == len(array._shape)) and - all(is_integer(dim_sel) or is_integer_array(dim_sel) - for dim_sel in selection) + return (len(selection) == len(array._shape)) and all( + is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection ) # noinspection PyProtectedMember def is_mask_selection(selection, array): return ( - len(selection) == 1 and - is_bool_array(selection[0]) and - selection[0].shape == array._shape + len(selection) == 1 and is_bool_array(selection[0]) and selection[0].shape == array._shape ) # noinspection PyProtectedMember class CoordinateIndexer: - def __init__(self, selection, array): # some initial normalization @@ -813,9 +802,11 @@ def __init__(self, selection, array): # validation if not is_coordinate_selection(selection, array): - raise IndexError('invalid coordinate selection; expected one integer ' - '(coordinate) array per dimension of the target array, ' - 'got {!r}'.format(selection)) + raise IndexError( + "invalid coordinate selection; expected one integer " + "(coordinate) array per dimension of the target array, " + "got {!r}".format(selection) + ) # handle wraparound, boundscheck for dim_sel, dim_len in zip(selection, array.shape): @@ -828,8 +819,7 @@ def __init__(self, selection, array): # compute chunk index for each point in the selection chunks_multi_index = tuple( - dim_sel // dim_chunk_len - for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) + dim_sel // dim_chunk_len for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) ) # broadcast selection - this will raise error if array dimensions don't match @@ -844,8 +834,7 @@ def __init__(self, selection, array): chunks_multi_index = [dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index] # ravel chunk indices - chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, - dims=array._cdata_shape) + chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, dims=array._cdata_shape) # group points by chunk if np.any(np.diff(chunks_raveled_indices) < 0): @@ -901,7 +890,6 @@ def __iter__(self): # noinspection PyProtectedMember class MaskIndexer(CoordinateIndexer): - def __init__(self, selection, array): # some initial normalization @@ -910,9 +898,10 @@ def __init__(self, selection, array): # validation if not is_mask_selection(selection, array): - raise IndexError('invalid mask selection; expected one Boolean (mask)' - 'array with the same shape as the target array, got {!r}' - .format(selection)) + raise IndexError( + "invalid mask selection; expected one Boolean (mask)" + "array with the same shape as the target array, got {!r}".format(selection) + ) # convert to indices selection = np.nonzero(selection[0]) @@ -922,7 +911,6 @@ def __init__(self, selection, array): class VIndex: - def __init__(self, array): self.array = array @@ -955,8 +943,10 @@ def check_fields(fields, dtype): return dtype # check type if not isinstance(fields, (str, list, tuple)): - raise IndexError("'fields' argument must be a string or list of strings; found " - "{!r}".format(type(fields))) + raise IndexError( + "'fields' argument must be a string or list of strings; found " + "{!r}".format(type(fields)) + ) if fields: if dtype.names is None: raise IndexError("invalid 'fields' argument, array does not have any fields") @@ -980,7 +970,7 @@ def check_no_multi_fields(fields): if len(fields) == 1: return fields[0] elif len(fields) > 1: - raise IndexError('multiple fields are not supported for this operation') + raise IndexError("multiple fields are not supported for this operation") return fields @@ -1009,11 +999,7 @@ def make_slice_selection(selection): ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) elif isinstance(dim_selection, np.ndarray): if len(dim_selection) == 1: - ls.append( - slice( - int(dim_selection[0]), int(dim_selection[0]) + 1, 1 - ) - ) + ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) else: raise ArrayIndexError() else: @@ -1108,10 +1094,10 @@ def __init__(self, selection, arr_shape): def __iter__(self): chunk1 = self.chunk_loc_slices[0] nitems = (chunk1[-1].stop - chunk1[-1].start) * np.prod( - self.arr_shape[len(chunk1):], dtype=int + self.arr_shape[len(chunk1) :], dtype=int ) for partial_out_selection in self.chunk_loc_slices: start = 0 for i, sl in enumerate(partial_out_selection): - start += sl.start * np.prod(self.arr_shape[i + 1:], dtype=int) + start += sl.start * np.prod(self.arr_shape[i + 1 :], dtype=int) yield start, nitems, partial_out_selection diff --git a/zarr/meta.py b/zarr/meta.py index aacffd7f77..48791ddf17 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -27,15 +27,11 @@ "extensions": [], } -_v3_core_types = set( - "".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8")) -) +_v3_core_types = set("".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8"))) _v3_core_types = {"bool", "i1", "u1"} | _v3_core_types # The set of complex types allowed ({"c8", ">c16"}) -_v3_complex_types = set( - f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("8", "16")) -) +_v3_complex_types = set(f"{end}c{_bytes}" for end, _bytes in itertools.product("<>", ("8", "16"))) # All dtype.str values corresponding to datetime64 and timedelta64 # see: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units @@ -43,7 +39,7 @@ _time_units = ["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"] _v3_datetime_types = set( f"{end}{kind}8[{unit}]" - for end, unit, kind in itertools.product("<>", _date_units + _time_units, ('m', 'M')) + for end, unit, kind in itertools.product("<>", _date_units + _time_units, ("m", "M")) ) @@ -217,9 +213,7 @@ def encode_group_metadata(cls, meta=None) -> bytes: return json_dumps(meta) @classmethod - def decode_fill_value( - cls, v: Any, dtype: np.dtype, object_codec: Any = None - ) -> Any: + def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: # early out if v is None: return v @@ -267,9 +261,7 @@ def decode_fill_value( return np.array(v, dtype=dtype)[()] @classmethod - def encode_fill_value( - cls, v: Any, dtype: np.dtype, object_codec: Any = None - ) -> Any: + def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: # early out if v is None: return v @@ -318,11 +310,9 @@ def decode_dtype(cls, d, validate=True): if isinstance(d, dict): # extract the type from the extension info try: - d = d['type'] + d = d["type"] except KeyError: - raise KeyError( - "Extended dtype info must provide a key named 'type'." - ) + raise KeyError("Extended dtype info must provide a key named 'type'.") d = cls._decode_dtype_descr(d) dtype = np.dtype(d) if validate: @@ -389,9 +379,7 @@ def encode_hierarchy_metadata(cls, meta=None) -> bytes: return json_dumps(meta) @classmethod - def decode_hierarchy_metadata( - cls, s: Union[MappingType, bytes, str] - ) -> MappingType[str, Any]: + def decode_hierarchy_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: meta = cls.parse_metadata(s) # check metadata format # zarr_format = meta.get("zarr_format", None) @@ -414,7 +402,7 @@ def _encode_codec_metadata(cls, codec: Codec) -> Optional[Mapping]: # only support gzip for now config = codec.get_config() del config["id"] - uri = 'https://purl.org/zarr/spec/codec/' + uri = "https://purl.org/zarr/spec/codec/" if isinstance(codec, numcodecs.GZip): uri = uri + "gzip/1.0" elif isinstance(codec, numcodecs.Zlib): @@ -438,19 +426,19 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: if meta is None: return None - uri = 'https://purl.org/zarr/spec/codec/' - conf = meta['configuration'] - if meta['codec'].startswith(uri + 'gzip/'): + uri = "https://purl.org/zarr/spec/codec/" + conf = meta["configuration"] + if meta["codec"].startswith(uri + "gzip/"): conf["id"] = "gzip" - elif meta['codec'].startswith(uri + 'zlib/'): + elif meta["codec"].startswith(uri + "zlib/"): conf["id"] = "zlib" - elif meta['codec'].startswith(uri + 'blosc/'): + elif meta["codec"].startswith(uri + "blosc/"): conf["id"] = "blosc" - elif meta['codec'].startswith(uri + 'bz2/'): + elif meta["codec"].startswith(uri + "bz2/"): conf["id"] = "bz2" - elif meta['codec'].startswith(uri + 'lz4/'): + elif meta["codec"].startswith(uri + "lz4/"): conf["id"] = "lz4" - elif meta['codec'].startswith(uri + 'lzma/'): + elif meta["codec"].startswith(uri + "lzma/"): conf["id"] = "lzma" else: raise NotImplementedError @@ -461,8 +449,7 @@ def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: @classmethod def _encode_storage_transformer_metadata( - cls, - storage_transformer: "StorageTransformer" + cls, storage_transformer: "StorageTransformer" ) -> Optional[Mapping]: return { "extension": storage_transformer.extension_uri, @@ -478,9 +465,9 @@ def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransfor # This might be changed to a proper registry in the future KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer, ShardingStorageTransformer] - conf = meta.get('configuration', {}) - extension_uri = meta['extension'] - transformer_type = meta['type'] + conf = meta.get("configuration", {}) + extension_uri = meta["extension"] + transformer_type = meta["type"] for StorageTransformerCls in KNOWN_STORAGE_TRANSFORMERS: if StorageTransformerCls.extension_uri == extension_uri: @@ -527,9 +514,9 @@ def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType ) # compressor field should be absent when there is no compression if compressor: - meta['compressor'] = compressor + meta["compressor"] = compressor if storage_transformers: - meta['storage_transformers'] = storage_transformers + meta["storage_transformers"] = storage_transformers except Exception as e: raise MetadataError("error decoding metadata: %s" % e) diff --git a/zarr/meta_v1.py b/zarr/meta_v1.py index bc4ae12228..4ac381f2ca 100644 --- a/zarr/meta_v1.py +++ b/zarr/meta_v1.py @@ -6,24 +6,24 @@ def decode_metadata(b): - s = str(b, 'ascii') + s = str(b, "ascii") meta = json.loads(s) - zarr_format = meta.get('zarr_format', None) + zarr_format = meta.get("zarr_format", None) if zarr_format != 1: - raise MetadataError('unsupported zarr format: %s' % zarr_format) + raise MetadataError("unsupported zarr format: %s" % zarr_format) try: meta = dict( - zarr_format=meta['zarr_format'], - shape=tuple(meta['shape']), - chunks=tuple(meta['chunks']), - dtype=decode_dtype(meta['dtype']), - compression=meta['compression'], - compression_opts=meta['compression_opts'], - fill_value=meta['fill_value'], - order=meta['order'], + zarr_format=meta["zarr_format"], + shape=tuple(meta["shape"]), + chunks=tuple(meta["chunks"]), + dtype=decode_dtype(meta["dtype"]), + compression=meta["compression"], + compression_opts=meta["compression_opts"], + fill_value=meta["fill_value"], + order=meta["order"], ) except Exception as e: - raise MetadataError('error decoding metadata: %s' % e) + raise MetadataError("error decoding metadata: %s" % e) else: return meta @@ -31,16 +31,16 @@ def decode_metadata(b): def encode_metadata(meta): meta = dict( zarr_format=1, - shape=meta['shape'], - chunks=meta['chunks'], - dtype=encode_dtype(meta['dtype']), - compression=meta['compression'], - compression_opts=meta['compression_opts'], - fill_value=meta['fill_value'], - order=meta['order'], + shape=meta["shape"], + chunks=meta["chunks"], + dtype=encode_dtype(meta["dtype"]), + compression=meta["compression"], + compression_opts=meta["compression_opts"], + fill_value=meta["fill_value"], + order=meta["order"], ) s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True) - b = s.encode('ascii') + b = s.encode("ascii") return b diff --git a/zarr/n5.py b/zarr/n5.py index 1eb6ef2b33..7e73905527 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -18,16 +18,16 @@ from .storage import attrs_key as zarr_attrs_key from .storage import group_meta_key as zarr_group_meta_key -N5_FORMAT = '2.0.0' +N5_FORMAT = "2.0.0" zarr_to_n5_keys = [ - ('chunks', 'blockSize'), - ('dtype', 'dataType'), - ('compressor', 'compression'), - ('shape', 'dimensions') + ("chunks", "blockSize"), + ("dtype", "dataType"), + ("compressor", "compression"), + ("shape", "dimensions"), ] -n5_attrs_key = 'attributes.json' -n5_keywords = ['n5', 'dataType', 'dimensions', 'blockSize', 'compression'] +n5_attrs_key = "attributes.json" +n5_keywords = ["n5", "dataType", "dimensions", "blockSize", "compression"] class N5Store(NestedDirectoryStore): @@ -173,13 +173,13 @@ def __contains__(self, key): if key_new not in self: return False # group if not a dataset (attributes do not contain 'dimensions') - return 'dimensions' not in self._load_n5_attrs(key_new) + return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): key_new = key.replace(zarr_array_meta_key, n5_attrs_key) # array if attributes contain 'dimensions' - return 'dimensions' in self._load_n5_attrs(key_new) + return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): @@ -195,10 +195,7 @@ def __contains__(self, key): return super().__contains__(key_new) def __eq__(self, other): - return ( - isinstance(other, N5Store) and - self.path == other.path - ) + return isinstance(other, N5Store) and self.path == other.path def listdir(self, path: Optional[str] = None): @@ -229,7 +226,7 @@ def listdir(self, path: Optional[str] = None): for file_name in file_names: file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path + os.path.sep)[1] - new_child = rel_path.replace(os.path.sep, '.') + new_child = rel_path.replace(os.path.sep, ".") new_children.append(invert_chunk_coords(new_child)) else: new_children.append(entry) @@ -265,7 +262,7 @@ def _is_group(self, path: str): attrs_key = os.path.join(path, n5_attrs_key) n5_attrs = self._load_n5_attrs(attrs_key) - return len(n5_attrs) > 0 and 'dimensions' not in n5_attrs + return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: str): @@ -274,7 +271,7 @@ def _is_array(self, path: str): else: attrs_key = os.path.join(path, n5_attrs_key) - return 'dimensions' in self._load_n5_attrs(attrs_key) + return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: str): @@ -340,27 +337,28 @@ class N5FSStore(FSStore): dimensions, hence the Zarr arrays targeting N5 have the deceptive "." dimension separator. """ - _array_meta_key = 'attributes.json' - _group_meta_key = 'attributes.json' - _attrs_key = 'attributes.json' + + _array_meta_key = "attributes.json" + _group_meta_key = "attributes.json" + _attrs_key = "attributes.json" def __init__(self, *args, **kwargs): - if 'dimension_separator' in kwargs: - kwargs.pop('dimension_separator') - warnings.warn('Keyword argument `dimension_separator` will be ignored') + if "dimension_separator" in kwargs: + kwargs.pop("dimension_separator") + warnings.warn("Keyword argument `dimension_separator` will be ignored") dimension_separator = "." super().__init__(*args, dimension_separator=dimension_separator, **kwargs) @staticmethod def _swap_separator(key: str): - segments = list(key.split('/')) + segments = list(key.split("/")) if segments: last_segment = segments[-1] if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '/'.join(coords[::-1]) + coords = list(last_segment.split(".")) + last_segment = "/".join(coords[::-1]) segments = segments[:-1] + [last_segment] - key = '/'.join(segments) + key = "/".join(segments) return key def _normalize_key(self, key: str): @@ -527,7 +525,7 @@ def listdir(self, path: Optional[str] = None): for file_name in self.fs.find(entry_path): file_path = os.path.join(root_path, file_name) rel_path = file_path.split(root_path)[1] - new_child = rel_path.lstrip('/').replace('/', ".") + new_child = rel_path.lstrip("/").replace("/", ".") new_children.append(invert_chunk_coords(new_child)) else: new_children.append(entry) @@ -586,7 +584,7 @@ def _contains_attrs(self, path: Optional[str]): def is_chunk_key(key: str): rv = False - segments = list(key.split('/')) + segments = list(key.split("/")) if segments: last_segment = segments[-1] rv = bool(_prog_ckey.match(last_segment)) @@ -594,118 +592,116 @@ def is_chunk_key(key: str): def invert_chunk_coords(key: str): - segments = list(key.split('/')) + segments = list(key.split("/")) if segments: last_segment = segments[-1] if _prog_ckey.match(last_segment): - coords = list(last_segment.split('.')) - last_segment = '/'.join(coords[::-1]) + coords = list(last_segment.split(".")) + last_segment = "/".join(coords[::-1]) segments = segments[:-1] + [last_segment] - key = '/'.join(segments) + key = "/".join(segments) return key def group_metadata_to_n5(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - '''Convert group metadata from zarr to N5 format.''' - del group_metadata['zarr_format'] + """Convert group metadata from zarr to N5 format.""" + del group_metadata["zarr_format"] # TODO: This should only exist at the top-level - group_metadata['n5'] = N5_FORMAT + group_metadata["n5"] = N5_FORMAT return group_metadata def group_metadata_to_zarr(group_metadata: Dict[str, Any]) -> Dict[str, Any]: - '''Convert group metadata from N5 to zarr format.''' + """Convert group metadata from N5 to zarr format.""" # This only exists at the top level - group_metadata.pop('n5', None) - group_metadata['zarr_format'] = ZARR_FORMAT + group_metadata.pop("n5", None) + group_metadata["zarr_format"] = ZARR_FORMAT return group_metadata def array_metadata_to_n5(array_metadata: Dict[str, Any], top_level=False) -> Dict[str, Any]: - '''Convert array metadata from zarr to N5 format. If the `top_level` keyword argument is True, - then the `N5` : N5_FORMAT key : value pair will be inserted into the metadata.''' + """Convert array metadata from zarr to N5 format. If the `top_level` keyword argument is True, + then the `N5` : N5_FORMAT key : value pair will be inserted into the metadata.""" for f, t in zarr_to_n5_keys: array_metadata[t] = array_metadata.pop(f) - del array_metadata['zarr_format'] + del array_metadata["zarr_format"] if top_level: - array_metadata['n5'] = N5_FORMAT + array_metadata["n5"] = N5_FORMAT try: - dtype = np.dtype(array_metadata['dataType']) + dtype = np.dtype(array_metadata["dataType"]) except TypeError: - raise TypeError( - f"Data type {array_metadata['dataType']} is not supported by N5") + raise TypeError(f"Data type {array_metadata['dataType']} is not supported by N5") - array_metadata['dataType'] = dtype.name - array_metadata['dimensions'] = array_metadata['dimensions'][::-1] - array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + array_metadata["dataType"] = dtype.name + array_metadata["dimensions"] = array_metadata["dimensions"][::-1] + array_metadata["blockSize"] = array_metadata["blockSize"][::-1] - if 'fill_value' in array_metadata: - if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: + if "fill_value" in array_metadata: + if array_metadata["fill_value"] != 0 and array_metadata["fill_value"] is not None: raise ValueError( - f'''Received fill_value = {array_metadata['fill_value']}, - but N5 only supports fill_value = 0''' - ) - del array_metadata['fill_value'] + f"""Received fill_value = {array_metadata['fill_value']}, + but N5 only supports fill_value = 0""" + ) + del array_metadata["fill_value"] - if 'order' in array_metadata: - if array_metadata['order'] != 'C': + if "order" in array_metadata: + if array_metadata["order"] != "C": raise ValueError( f"Received order = {array_metadata['order']}, but N5 only supports order = C" - ) - del array_metadata['order'] + ) + del array_metadata["order"] - if 'filters' in array_metadata: - if array_metadata['filters'] != [] and array_metadata['filters'] is not None: - raise ValueError( - "Received filters, but N5 storage does not support zarr filters" - ) - del array_metadata['filters'] + if "filters" in array_metadata: + if array_metadata["filters"] != [] and array_metadata["filters"] is not None: + raise ValueError("Received filters, but N5 storage does not support zarr filters") + del array_metadata["filters"] - assert 'compression' in array_metadata - compressor_config = array_metadata['compression'] + assert "compression" in array_metadata + compressor_config = array_metadata["compression"] compressor_config = compressor_config_to_n5(compressor_config) - array_metadata['compression'] = compressor_config + array_metadata["compression"] = compressor_config - if 'dimension_separator' in array_metadata: - del array_metadata['dimension_separator'] + if "dimension_separator" in array_metadata: + del array_metadata["dimension_separator"] return array_metadata -def array_metadata_to_zarr(array_metadata: Dict[str, Any], - top_level: bool = False) -> Dict[str, Any]: - '''Convert array metadata from N5 to zarr format. - If the `top_level` keyword argument is True, then the `N5` key will be removed from metadata''' +def array_metadata_to_zarr( + array_metadata: Dict[str, Any], top_level: bool = False +) -> Dict[str, Any]: + """Convert array metadata from N5 to zarr format. + If the `top_level` keyword argument is True, then the `N5` key will be removed from metadata""" for t, f in zarr_to_n5_keys: array_metadata[t] = array_metadata.pop(f) if top_level: - array_metadata.pop('n5') - array_metadata['zarr_format'] = ZARR_FORMAT - - array_metadata['shape'] = array_metadata['shape'][::-1] - array_metadata['chunks'] = array_metadata['chunks'][::-1] - array_metadata['fill_value'] = 0 # also if None was requested - array_metadata['order'] = 'C' - array_metadata['filters'] = [] - array_metadata['dimension_separator'] = '.' - array_metadata['dtype'] = np.dtype(array_metadata['dtype']).str - - compressor_config = array_metadata['compressor'] + array_metadata.pop("n5") + array_metadata["zarr_format"] = ZARR_FORMAT + + array_metadata["shape"] = array_metadata["shape"][::-1] + array_metadata["chunks"] = array_metadata["chunks"][::-1] + array_metadata["fill_value"] = 0 # also if None was requested + array_metadata["order"] = "C" + array_metadata["filters"] = [] + array_metadata["dimension_separator"] = "." + array_metadata["dtype"] = np.dtype(array_metadata["dtype"]).str + + compressor_config = array_metadata["compressor"] compressor_config = compressor_config_to_zarr(compressor_config) - array_metadata['compressor'] = { - 'id': N5ChunkWrapper.codec_id, - 'compressor_config': compressor_config, - 'dtype': array_metadata['dtype'], - 'chunk_shape': array_metadata['chunks'] + array_metadata["compressor"] = { + "id": N5ChunkWrapper.codec_id, + "compressor_config": compressor_config, + "dtype": array_metadata["dtype"], + "chunk_shape": array_metadata["chunks"], } return array_metadata def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: - '''Get all zarr attributes from an N5 attributes dictionary (i.e., - all non-keyword attributes).''' + """Get all zarr attributes from an N5 attributes dictionary (i.e., + all non-keyword attributes).""" # remove all N5 keywords for n5_key in n5_keywords: @@ -718,134 +714,133 @@ def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: if compressor_config is None: - return {'type': 'raw'} + return {"type": "raw"} else: _compressor_config = compressor_config # peel wrapper, if present - if _compressor_config['id'] == N5ChunkWrapper.codec_id: - _compressor_config = _compressor_config['compressor_config'] + if _compressor_config["id"] == N5ChunkWrapper.codec_id: + _compressor_config = _compressor_config["compressor_config"] - codec_id = _compressor_config['id'] - n5_config = {'type': codec_id} + codec_id = _compressor_config["id"] + n5_config = {"type": codec_id} - if codec_id == 'bz2': + if codec_id == "bz2": - n5_config['type'] = 'bzip2' - n5_config['blockSize'] = _compressor_config['level'] + n5_config["type"] = "bzip2" + n5_config["blockSize"] = _compressor_config["level"] - elif codec_id == 'blosc': + elif codec_id == "blosc": - n5_config['cname'] = _compressor_config['cname'] - n5_config['clevel'] = _compressor_config['clevel'] - n5_config['shuffle'] = _compressor_config['shuffle'] - n5_config['blocksize'] = _compressor_config['blocksize'] + n5_config["cname"] = _compressor_config["cname"] + n5_config["clevel"] = _compressor_config["clevel"] + n5_config["shuffle"] = _compressor_config["shuffle"] + n5_config["blocksize"] = _compressor_config["blocksize"] - elif codec_id == 'lzma': + elif codec_id == "lzma": # Switch to XZ for N5 if we are using the default XZ format. # Note: 4 is the default, which is lzma.CHECK_CRC64. - if _compressor_config['format'] == 1 and _compressor_config['check'] in [-1, 4]: - n5_config['type'] = 'xz' + if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: + n5_config["type"] = "xz" else: warnings.warn( "Not all N5 implementations support lzma compression (yet). You " "might not be able to open the dataset with another N5 library.", - RuntimeWarning + RuntimeWarning, ) - n5_config['format'] = _compressor_config['format'] - n5_config['check'] = _compressor_config['check'] - n5_config['filters'] = _compressor_config['filters'] + n5_config["format"] = _compressor_config["format"] + n5_config["check"] = _compressor_config["check"] + n5_config["filters"] = _compressor_config["filters"] # The default is lzma.PRESET_DEFAULT, which is 6. - if _compressor_config['preset']: - n5_config['preset'] = _compressor_config['preset'] + if _compressor_config["preset"]: + n5_config["preset"] = _compressor_config["preset"] else: - n5_config['preset'] = 6 + n5_config["preset"] = 6 - elif codec_id == 'zlib': + elif codec_id == "zlib": - n5_config['type'] = 'gzip' - n5_config['level'] = _compressor_config['level'] - n5_config['useZlib'] = True + n5_config["type"] = "gzip" + n5_config["level"] = _compressor_config["level"] + n5_config["useZlib"] = True - elif codec_id == 'gzip': + elif codec_id == "gzip": - n5_config['type'] = 'gzip' - n5_config['level'] = _compressor_config['level'] - n5_config['useZlib'] = False + n5_config["type"] = "gzip" + n5_config["level"] = _compressor_config["level"] + n5_config["useZlib"] = False else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != 'type'}) + n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) return n5_config def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config['type'] - zarr_config = {'id': codec_id} + codec_id = compressor_config["type"] + zarr_config = {"id": codec_id} - if codec_id == 'bzip2': + if codec_id == "bzip2": - zarr_config['id'] = 'bz2' - zarr_config['level'] = compressor_config['blockSize'] + zarr_config["id"] = "bz2" + zarr_config["level"] = compressor_config["blockSize"] - elif codec_id == 'blosc': + elif codec_id == "blosc": - zarr_config['cname'] = compressor_config['cname'] - zarr_config['clevel'] = compressor_config['clevel'] - zarr_config['shuffle'] = compressor_config['shuffle'] - zarr_config['blocksize'] = compressor_config['blocksize'] + zarr_config["cname"] = compressor_config["cname"] + zarr_config["clevel"] = compressor_config["clevel"] + zarr_config["shuffle"] = compressor_config["shuffle"] + zarr_config["blocksize"] = compressor_config["blocksize"] - elif codec_id == 'lzma': + elif codec_id == "lzma": - zarr_config['format'] = compressor_config['format'] - zarr_config['check'] = compressor_config['check'] - zarr_config['preset'] = compressor_config['preset'] - zarr_config['filters'] = compressor_config['filters'] + zarr_config["format"] = compressor_config["format"] + zarr_config["check"] = compressor_config["check"] + zarr_config["preset"] = compressor_config["preset"] + zarr_config["filters"] = compressor_config["filters"] - elif codec_id == 'xz': + elif codec_id == "xz": - zarr_config['id'] = 'lzma' - zarr_config['format'] = 1 # lzma.FORMAT_XZ - zarr_config['check'] = -1 - zarr_config['preset'] = compressor_config['preset'] - zarr_config['filters'] = None + zarr_config["id"] = "lzma" + zarr_config["format"] = 1 # lzma.FORMAT_XZ + zarr_config["check"] = -1 + zarr_config["preset"] = compressor_config["preset"] + zarr_config["filters"] = None - elif codec_id == 'gzip': + elif codec_id == "gzip": - if 'useZlib' in compressor_config and compressor_config['useZlib']: - zarr_config['id'] = 'zlib' - zarr_config['level'] = compressor_config['level'] + if "useZlib" in compressor_config and compressor_config["useZlib"]: + zarr_config["id"] = "zlib" + zarr_config["level"] = compressor_config["level"] else: - zarr_config['id'] = 'gzip' - zarr_config['level'] = compressor_config['level'] + zarr_config["id"] = "gzip" + zarr_config["level"] = compressor_config["level"] - elif codec_id == 'raw': + elif codec_id == "raw": return None else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != 'type'}) + zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) return zarr_config class N5ChunkWrapper(Codec): - codec_id = 'n5_wrapper' + codec_id = "n5_wrapper" def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): self.dtype = np.dtype(dtype) self.chunk_shape = tuple(chunk_shape) # is the dtype a little endian format? - self._little_endian = ( - self.dtype.byteorder == '<' or - (self.dtype.byteorder == '=' and sys.byteorder == 'little') + self._little_endian = self.dtype.byteorder == "<" or ( + self.dtype.byteorder == "=" and sys.byteorder == "little" ) if compressor: @@ -853,9 +848,7 @@ def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): raise ValueError("Only one of compressor_config or compressor should be given.") compressor_config = compressor.get_config() - if ( - compressor_config is None and compressor is None or - compressor_config['id'] == 'raw'): + if compressor_config is None and compressor is None or compressor_config["id"] == "raw": self.compressor_config = None self._compressor = None else: @@ -863,10 +856,7 @@ def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): self.compressor_config = self._compressor.get_config() def get_config(self): - config = { - 'id': self.codec_id, - 'compressor_config': self.compressor_config - } + config = {"id": self.codec_id, "compressor_config": self.compressor_config} return config def encode(self, chunk): @@ -879,7 +869,7 @@ def encode(self, chunk): if self._compressor: return header + self._compressor.encode(chunk) else: - return header + chunk.tobytes(order='A') + return header + chunk.tobytes(order="A") def decode(self, chunk, out=None) -> bytes: @@ -889,10 +879,9 @@ def decode(self, chunk, out=None) -> bytes: if out is not None: # out should only be used if we read a complete chunk - assert chunk_shape == self.chunk_shape, ( - "Expected chunk of shape {}, found {}".format( - self.chunk_shape, - chunk_shape)) + assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( + self.chunk_shape, chunk_shape + ) if self._compressor: self._compressor.decode(chunk, out) @@ -927,25 +916,21 @@ def decode(self, chunk, out=None) -> bytes: @staticmethod def _create_header(chunk): - mode = struct.pack('>H', 0) - num_dims = struct.pack('>H', len(chunk.shape)) - shape = b''.join( - struct.pack('>I', d) - for d in chunk.shape[::-1] - ) + mode = struct.pack(">H", 0) + num_dims = struct.pack(">H", len(chunk.shape)) + shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) return mode + num_dims + shape @staticmethod def _read_header(chunk): - num_dims = struct.unpack('>H', chunk[2:4])[0] + num_dims = struct.unpack(">H", chunk[2:4])[0] shape = tuple( - struct.unpack('>I', chunk[i:i+4])[0] - for i in range(4, num_dims*4 + 4, 4) + struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) )[::-1] - len_header = 4 + num_dims*4 + len_header = 4 + num_dims * 4 return len_header, shape @@ -962,7 +947,7 @@ def _from_big_endian(self, data): if not self._little_endian: return data - a = np.frombuffer(data, self.dtype.newbyteorder('>')) + a = np.frombuffer(data, self.dtype.newbyteorder(">")) return a.astype(self.dtype) diff --git a/zarr/storage.py b/zarr/storage.py index ef1bd64955..ffd8fba45c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -36,11 +36,7 @@ import time from numcodecs.abc import Codec -from numcodecs.compat import ( - ensure_bytes, - ensure_text, - ensure_contiguous_ndarray_like -) +from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry from zarr.context import Context @@ -53,45 +49,58 @@ ReadOnlyError, ) from zarr.meta import encode_array_metadata, encode_group_metadata -from zarr.util import (buffer_size, json_loads, nolock, normalize_chunks, - normalize_dimension_separator, - normalize_dtype, normalize_fill_value, normalize_order, - normalize_shape, normalize_storage_path, retry_call, - ensure_contiguous_ndarray_or_bytes) +from zarr.util import ( + buffer_size, + json_loads, + nolock, + normalize_chunks, + normalize_dimension_separator, + normalize_dtype, + normalize_fill_value, + normalize_order, + normalize_shape, + normalize_storage_path, + retry_call, + ensure_contiguous_ndarray_or_bytes, +) from zarr._storage.absstore import ABSStore # noqa: F401 -from zarr._storage.store import (_get_hierarchy_metadata, # noqa: F401 - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - DEFAULT_ZARR_VERSION, - BaseStore, - Store) +from zarr._storage.store import ( + _get_hierarchy_metadata, # noqa: F401 + _get_metadata_suffix, + _listdir_from_keys, + _rename_from_keys, + _rename_metadata_v3, + _rmdir_from_keys, + _rmdir_from_keys_v3, + _path_to_prefix, + _prefix_to_array_key, + _prefix_to_group_key, + array_meta_key, + attrs_key, + data_root, + group_meta_key, + meta_root, + DEFAULT_ZARR_VERSION, + BaseStore, + Store, +) __doctest_requires__ = { - ('RedisStore', 'RedisStore.*'): ['redis'], - ('MongoDBStore', 'MongoDBStore.*'): ['pymongo'], - ('LRUStoreCache', 'LRUStoreCache.*'): ['s3fs'], + ("RedisStore", "RedisStore.*"): ["redis"], + ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], + ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], } try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc + default_compressor = Blosc() except ImportError: # pragma: no cover from zarr.codecs import Zlib + default_compressor = Zlib() @@ -113,7 +122,7 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b path = normalize_storage_path(path) prefix = _path_to_prefix(path) key = _prefix_to_group_key(store, prefix) - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if store_version == 2 or explicit_only: return key in store else: @@ -122,9 +131,9 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b # for v3, need to also handle implicit groups sfx = _get_metadata_suffix(store) # type: ignore - implicit_prefix = key.replace('.group' + sfx, '') - if not implicit_prefix.endswith('/'): - implicit_prefix += '/' + implicit_prefix = key.replace(".group" + sfx, "") + if not implicit_prefix.endswith("/"): + implicit_prefix += "/" if store.list_prefix(implicit_prefix): # type: ignore return True return False @@ -132,7 +141,7 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseStore: # default to v2 store for backward compatibility - zarr_version = getattr(store, '_store_version', 2) + zarr_version = getattr(store, "_store_version", 2) if zarr_version != 2: raise ValueError("store must be a version 2 store") if store is None: @@ -142,23 +151,27 @@ def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseS store = os.fspath(store) if FSStore._fsspec_installed(): import fsspec + if isinstance(store, fsspec.FSMap): - return FSStore(store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {})) + return FSStore( + store.root, + fs=store.fs, + mode=mode, + check=store.check, + create=store.create, + missing_exceptions=store.missing_exceptions, + **(storage_options or {}), + ) if isinstance(store, str): if "://" in store or "::" in store: return FSStore(store, mode=mode, **(storage_options or {})) elif storage_options: raise ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): + if store.endswith(".zip"): return ZipStore(store, mode=mode) - elif store.endswith('.n5'): + elif store.endswith(".n5"): from zarr.n5 import N5Store + return N5Store(store) else: return DirectoryStore(store) @@ -167,8 +180,9 @@ def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseS return store -def normalize_store_arg(store: Any, storage_options=None, mode="r", *, - zarr_version=None) -> BaseStore: +def normalize_store_arg( + store: Any, storage_options=None, mode="r", *, zarr_version=None +) -> BaseStore: if zarr_version is None: # default to v2 store for backward compatibility zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) @@ -176,6 +190,7 @@ def normalize_store_arg(store: Any, storage_options=None, mode="r", *, normalize_store = _normalize_store_arg_v2 elif zarr_version == 3: from zarr._storage.v3 import _normalize_store_arg_v3 + normalize_store = _normalize_store_arg_v3 else: raise ValueError("zarr_version must be either 2 or 3") @@ -187,7 +202,7 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through store.rmdir(path) # type: ignore @@ -205,7 +220,7 @@ def rename(store: Store, src_path: Path, dst_path: Path): `Store` interface.""" src_path = normalize_storage_path(src_path) dst_path = normalize_storage_path(dst_path) - if hasattr(store, 'rename'): + if hasattr(store, "rename"): # pass through store.rename(src_path, dst_path) else: @@ -218,7 +233,7 @@ def listdir(store: BaseStore, path: Path = None): method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) - if hasattr(store, 'listdir'): + if hasattr(store, "listdir"): # pass through return store.listdir(path) # type: ignore else: @@ -237,14 +252,14 @@ def _getsize(store: BaseStore, path: Path = None) -> int: v = store[path] size = buffer_size(v) else: - path = '' if path is None else normalize_storage_path(path) + path = "" if path is None else normalize_storage_path(path) size = 0 - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if store_version == 3: - if path == '': + if path == "": # have to list the root folders without trailing / in this case - members = store.list_prefix(data_root.rstrip('/')) # type: ignore - members += store.list_prefix(meta_root.rstrip('/')) # type: ignore + members = store.list_prefix(data_root.rstrip("/")) # type: ignore + members += store.list_prefix(meta_root.rstrip("/")) # type: ignore else: members = store.list_prefix(data_root + path) # type: ignore members += store.list_prefix(meta_root + path) # type: ignore @@ -270,7 +285,7 @@ def _getsize(store: BaseStore, path: Path = None) -> int: def getsize(store: BaseStore, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" - if hasattr(store, 'getsize'): + if hasattr(store, "getsize"): # pass through path = normalize_storage_path(path) return store.getsize(path) # type: ignore @@ -288,12 +303,11 @@ def _require_parent_group( ): # assume path is normalized if path: - segments = path.split('/') + segments = path.split("/") for i in range(len(segments)): - p = '/'.join(segments[:i]) + p = "/".join(segments[:i]) if contains_array(store, p): - _init_group_metadata(store, path=p, chunk_store=chunk_store, - overwrite=overwrite) + _init_group_metadata(store, path=p, chunk_store=chunk_store, overwrite=overwrite) elif not contains_group(store, p): _init_group_metadata(store, path=p, chunk_store=chunk_store) @@ -425,23 +439,31 @@ def init_array( # ensure parent group initialized store_version = getattr(store, "_store_version", 2) if store_version < 3: - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - if store_version == 3 and 'zarr.json' not in store: + if store_version == 3 and "zarr.json" not in store: # initialize with default zarr.json entry level metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore if not compressor: # compatibility with legacy tests using compressor=[] compressor = None - _init_array_metadata(store, shape=shape, chunks=chunks, dtype=dtype, - compressor=compressor, fill_value=fill_value, - order=order, overwrite=overwrite, path=path, - chunk_store=chunk_store, filters=filters, - object_codec=object_codec, - dimension_separator=dimension_separator, - storage_transformers=storage_transformers) + _init_array_metadata( + store, + shape=shape, + chunks=chunks, + dtype=dtype, + compressor=compressor, + fill_value=fill_value, + order=order, + overwrite=overwrite, + path=path, + chunk_store=chunk_store, + filters=filters, + object_codec=object_codec, + dimension_separator=dimension_separator, + storage_transformers=storage_transformers, + ) def _init_array_metadata( @@ -461,7 +483,7 @@ def _init_array_metadata( storage_transformers=(), ): - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -486,11 +508,11 @@ def _init_array_metadata( if chunk_store is not None: chunk_store.erase_prefix(data_prefix) # type: ignore - if '/' in path: + if "/" in path: # path is a subfolder of an existing array, remove that array - parent_path = '/'.join(path.split('/')[:-1]) + parent_path = "/".join(path.split("/")[:-1]) sfx = _get_metadata_suffix(store) # type: ignore - array_key = meta_root + parent_path + '.array' + sfx + array_key = meta_root + parent_path + ".array" + sfx if array_key in store: store.erase(array_key) # type: ignore @@ -500,9 +522,9 @@ def _init_array_metadata( elif contains_group(store, path, explicit_only=False): raise ContainsGroupError(path) elif store_version == 3: - if '/' in path: + if "/" in path: # cannot create an array within an existing array path - parent_path = '/'.join(path.split('/')[:-1]) + parent_path = "/".join(path.split("/")[:-1]) if contains_array(store, parent_path): raise ContainsArrayError(path) @@ -523,10 +545,10 @@ def _init_array_metadata( if shape == (): # no point in compressing a 0-dimensional array, only a single value compressor = None - elif compressor == 'none': + elif compressor == "none": # compatibility compressor = None - elif compressor == 'default': + elif compressor == "default": compressor = default_compressor # obtain compressor config @@ -556,16 +578,19 @@ def _init_array_metadata( if object_codec is None: if not filters: # there are no filters so we can be sure there is no object codec - raise ValueError('missing object_codec for object array') + raise ValueError("missing object_codec for object array") else: # one of the filters may be an object codec, issue a warning rather # than raise an error to maintain backwards-compatibility - warnings.warn('missing object_codec for object array; this will raise a ' - 'ValueError in version 3.0', FutureWarning) + warnings.warn( + "missing object_codec for object array; this will raise a " + "ValueError in version 3.0", + FutureWarning, + ) else: filters_config.insert(0, object_codec.get_config()) elif object_codec is not None: - warnings.warn('an object_codec is only needed for object arrays') + warnings.warn("an object_codec is only needed for object arrays") # use null to indicate no filters if not filters_config: @@ -574,32 +599,34 @@ def _init_array_metadata( # initialize metadata # TODO: don't store redundant dimension_separator for v3? _compressor = compressor_config if store_version == 2 else compressor - meta = dict(shape=shape, compressor=_compressor, - fill_value=fill_value, - dimension_separator=dimension_separator) + meta = dict( + shape=shape, + compressor=_compressor, + fill_value=fill_value, + dimension_separator=dimension_separator, + ) if store_version < 3: - meta.update(dict(chunks=chunks, dtype=dtype, order=order, - filters=filters_config)) + meta.update(dict(chunks=chunks, dtype=dtype, order=order, filters=filters_config)) assert not storage_transformers else: if dimension_separator is None: dimension_separator = "/" if filters_config: - attributes = {'filters': filters_config} + attributes = {"filters": filters_config} else: attributes = {} meta.update( - dict(chunk_grid=dict(type="regular", - chunk_shape=chunks, - separator=dimension_separator), - chunk_memory_layout=order, - data_type=dtype, - attributes=attributes, - storage_transformers=storage_transformers) + dict( + chunk_grid=dict(type="regular", chunk_shape=chunks, separator=dimension_separator), + chunk_memory_layout=order, + data_type=dtype, + attributes=attributes, + storage_transformers=storage_transformers, + ) ) key = _prefix_to_array_key(store, _path_to_prefix(path)) - if hasattr(store, '_metadata_class'): + if hasattr(store, "_metadata_class"): store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore else: store[key] = encode_array_metadata(meta) @@ -635,19 +662,17 @@ def init_group( # normalize path path = normalize_storage_path(path) - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) if store_version < 3: # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, - overwrite=overwrite) + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - if store_version == 3 and 'zarr.json' not in store: + if store_version == 3 and "zarr.json" not in store: # initialize with default zarr.json entry level metadata - store['zarr.json'] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore # initialise metadata - _init_group_metadata(store=store, overwrite=overwrite, path=path, - chunk_store=chunk_store) + _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store) if store_version == 3: # TODO: Should initializing a v3 group also create a corresponding @@ -663,7 +688,7 @@ def _init_group_metadata( chunk_store: Optional[StoreLike] = None, ): - store_version = getattr(store, '_store_version', 2) + store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) # guard conditions @@ -694,9 +719,9 @@ def _init_group_metadata( raise ContainsArrayError(path) elif contains_group(store, path): raise ContainsGroupError(path) - elif store_version == 3 and '/' in path: + elif store_version == 3 and "/" in path: # cannot create a group overlapping with an existing array name - parent_path = '/'.join(path.split('/')[:-1]) + parent_path = "/".join(path.split("/")[:-1]) if contains_array(store, parent_path): raise ContainsArrayError(path) @@ -704,11 +729,11 @@ def _init_group_metadata( # N.B., currently no metadata properties are needed, however there may # be in future if store_version == 3: - meta = {'attributes': {}} # type: ignore + meta = {"attributes": {}} # type: ignore else: meta = {} # type: ignore key = _prefix_to_group_key(store, _path_to_prefix(path)) - if hasattr(store, '_metadata_class'): + if hasattr(store, "_metadata_class"): store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore else: store[key] = encode_group_metadata(meta) @@ -718,7 +743,7 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): for k in d.keys(): v = d[k] if isinstance(v, cls): - yield from _dict_store_keys(v, prefix + k + '/', cls) + yield from _dict_store_keys(v, prefix + k + "/", cls) else: yield prefix + k @@ -814,7 +839,7 @@ def __setstate__(self, state): def _get_parent(self, item: str): parent = self.root # split the item - segments = item.split('/') + segments = item.split("/") # find the parent container for k in segments[:-1]: parent = parent[k] @@ -825,7 +850,7 @@ def _get_parent(self, item: str): def _require_parent(self, item): parent = self.root # split the item - segments = item.split('/') + segments = item.split("/") # require the parent container for k in segments[:-1]: try: @@ -874,11 +899,7 @@ def __contains__(self, item: str): # type: ignore[override] return not isinstance(value, self.cls) def __eq__(self, other): - return ( - isinstance(other, MemoryStore) and - self.root == other.root and - self.cls == other.cls - ) + return isinstance(other, MemoryStore) and self.root == other.root and self.cls == other.cls def keys(self): yield from _dict_store_keys(self.root, cls=self.cls) @@ -963,12 +984,13 @@ def clear(self): class DictStore(MemoryStore): - def __init__(self, *args, **kwargs): - warnings.warn("DictStore has been renamed to MemoryStore in 2.4.0 and " - "will be removed in the future. Please use MemoryStore.", - DeprecationWarning, - stacklevel=2) + warnings.warn( + "DictStore has been renamed to MemoryStore in 2.4.0 and " + "will be removed in the future. Please use MemoryStore.", + DeprecationWarning, + stacklevel=2, + ) super().__init__(*args, **kwargs) @@ -1048,7 +1070,7 @@ def _normalize_key(self, key): @staticmethod def _fromfile(fn): - """ Read data from a file + """Read data from a file Parameters ---------- @@ -1060,12 +1082,12 @@ def _fromfile(fn): Subclasses should overload this method to specify any custom file reading logic. """ - with open(fn, 'rb') as f: + with open(fn, "rb") as f: return f.read() @staticmethod def _tofile(a, fn): - """ Write data to a file + """Write data to a file Parameters ---------- @@ -1079,7 +1101,7 @@ def _tofile(a, fn): Subclasses should overload this method to specify any custom file writing logic. """ - with open(fn, mode='wb') as f: + with open(fn, mode="wb") as f: f.write(a) def __getitem__(self, key): @@ -1116,7 +1138,7 @@ def __setitem__(self, key, value): # write to temporary file # note we're not using tempfile.NamedTemporaryFile to avoid restrictive file permissions - temp_name = file_name + '.' + uuid.uuid4().hex + '.partial' + temp_name = file_name + "." + uuid.uuid4().hex + ".partial" temp_path = os.path.join(dir_path, temp_name) try: self._tofile(value, temp_path) @@ -1149,10 +1171,7 @@ def __contains__(self, key): return os.path.isfile(file_path) def __eq__(self, other): - return ( - isinstance(other, DirectoryStore) and - self.path == other.path - ) + return isinstance(other, DirectoryStore) and self.path == other.path def keys(self): if os.path.exists(self.path): @@ -1184,8 +1203,11 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): - return self._nested_listdir(path) if self._dimension_separator == "/" else \ - self._flat_listdir(path) + return ( + self._nested_listdir(path) + if self._dimension_separator == "/" + else self._flat_listdir(path) + ) def _flat_listdir(self, path=None): dir_path = self.dir_path(path) @@ -1208,9 +1230,9 @@ def _nested_listdir(self, path=None): for file_name in file_names: file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path + os.path.sep)[1] - new_children.append(rel_path.replace( - os.path.sep, - self._dimension_separator or '.')) + new_children.append( + rel_path.replace(os.path.sep, self._dimension_separator or ".") + ) else: new_children.append(entry) return sorted(new_children) @@ -1256,21 +1278,21 @@ def clear(self): shutil.rmtree(self.path) -def atexit_rmtree(path, - isdir=os.path.isdir, - rmtree=shutil.rmtree): # pragma: no cover +def atexit_rmtree(path, isdir=os.path.isdir, rmtree=shutil.rmtree): # pragma: no cover """Ensure directory removal at interpreter exit.""" if isdir(path): rmtree(path) # noinspection PyShadowingNames -def atexit_rmglob(path, - glob=glob.glob, - isdir=os.path.isdir, - isfile=os.path.isfile, - remove=os.remove, - rmtree=shutil.rmtree): # pragma: no cover +def atexit_rmglob( + path, + glob=glob.glob, + isdir=os.path.isdir, + isfile=os.path.isfile, + remove=os.remove, + rmtree=shutil.rmtree, +): # pragma: no cover """Ensure removal of multiple files at interpreter exit.""" for p in glob(path): if isfile(p): @@ -1316,19 +1338,25 @@ class FSStore(Store): storage_options : passed to the fsspec implementation. Cannot be used together with fs. """ + _array_meta_key = array_meta_key _group_meta_key = group_meta_key _attrs_key = attrs_key - def __init__(self, url, normalize_keys=False, key_separator=None, - mode='w', - exceptions=(KeyError, PermissionError, IOError), - dimension_separator=None, - fs=None, - check=False, - create=False, - missing_exceptions=None, - **storage_options): + def __init__( + self, + url, + normalize_keys=False, + key_separator=None, + mode="w", + exceptions=(KeyError, PermissionError, IOError), + dimension_separator=None, + fs=None, + check=False, + create=False, + missing_exceptions=None, + **storage_options, + ): if not self._fsspec_installed(): # pragma: no cover raise ImportError("`fsspec` is required to use zarr's FSStore") import fsspec @@ -1374,13 +1402,13 @@ def _default_key_separator(self): self.key_separator = "." def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip('/') + key = normalize_storage_path(key).lstrip("/") if key: - *bits, end = key.split('/') + *bits, end = key.split("/") if end not in (self._array_meta_key, self._group_meta_key, self._attrs_key): - end = end.replace('.', self.key_separator) - key = '/'.join(bits + [end]) + end = end.replace(".", self.key_separator) + key = "/".join(bits + [end]) return key.lower() if self.normalize_keys else key @@ -1402,7 +1430,7 @@ def __getitem__(self, key): raise KeyError(key) from e def setitems(self, values): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() # Normalize keys and make sure the values are bytes @@ -1413,7 +1441,7 @@ def setitems(self, values): self.map.setitems(values) def __setitem__(self, key, value): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() key = self._normalize_key(key) value = ensure_contiguous_ndarray_or_bytes(value) @@ -1427,7 +1455,7 @@ def __setitem__(self, key, value): raise KeyError(key) from e def __delitem__(self, key): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() key = self._normalize_key(key) path = self.dir_path(key) @@ -1437,7 +1465,7 @@ def __delitem__(self, key): del self.map[key] def delitems(self, keys): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() # only remove the keys that exist in the store nkeys = [self._normalize_key(key) for key in keys if key in self] @@ -1450,8 +1478,7 @@ def __contains__(self, key): return key in self.map def __eq__(self, other): - return (type(self) is type(other) and self.map == other.map - and self.mode == other.mode) + return type(self) is type(other) and self.map == other.map and self.mode == other.mode def keys(self): return iter(self.map) @@ -1469,8 +1496,9 @@ def dir_path(self, path=None): def listdir(self, path=None): dir_path = self.dir_path(path) try: - children = sorted(p.rstrip('/').rsplit('/', 1)[-1] - for p in self.fs.ls(dir_path, detail=False)) + children = sorted( + p.rstrip("/").rsplit("/", 1)[-1] for p in self.fs.ls(dir_path, detail=False) + ) if self.key_separator != "/": return children else: @@ -1485,8 +1513,8 @@ def listdir(self, path=None): for file_name in self.fs.find(entry_path): file_path = os.path.join(dir_path, file_name) rel_path = file_path.split(root_path)[1] - rel_path = rel_path.lstrip('/') - new_children.append(rel_path.replace('/', '.')) + rel_path = rel_path.lstrip("/") + new_children.append(rel_path.replace("/", ".")) else: new_children.append(entry) return sorted(new_children) @@ -1496,7 +1524,7 @@ def listdir(self, path=None): return [] def rmdir(self, path=None): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() store_path = self.dir_path(path) if self.fs.isdir(store_path): @@ -1507,7 +1535,7 @@ def getsize(self, path=None): return self.fs.du(store_path, True, True) def clear(self): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() self.map.clear() @@ -1540,15 +1568,16 @@ class TempStore(DirectoryStore): """ # noinspection PyShadowingBuiltins - def __init__(self, suffix='', prefix='zarr', dir=None, normalize_keys=False, - dimension_separator=None): + def __init__( + self, suffix="", prefix="zarr", dir=None, normalize_keys=False, dimension_separator=None + ): path = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir) atexit.register(atexit_rmtree, path) super().__init__(path, normalize_keys=normalize_keys) -_prog_ckey = re.compile(r'^(\d+)(\.\d+)+$') -_prog_number = re.compile(r'^\d+$') +_prog_ckey = re.compile(r"^(\d+)(\.\d+)+$") +_prog_number = re.compile(r"^\d+$") class NestedDirectoryStore(DirectoryStore): @@ -1629,15 +1658,11 @@ def __init__(self, path, normalize_keys=False, dimension_separator="/"): if dimension_separator is None: dimension_separator = "/" elif dimension_separator != "/": - raise ValueError( - "NestedDirectoryStore only supports '/' as dimension_separator") + raise ValueError("NestedDirectoryStore only supports '/' as dimension_separator") self._dimension_separator = dimension_separator def __eq__(self, other): - return ( - isinstance(other, NestedDirectoryStore) and - self.path == other.path - ) + return isinstance(other, NestedDirectoryStore) and self.path == other.path # noinspection PyPep8Naming @@ -1735,8 +1760,14 @@ class also supports the context manager protocol, which ensures the ``close()`` _erasable = False - def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode='a', - dimension_separator=None): + def __init__( + self, + path, + compression=zipfile.ZIP_STORED, + allowZip64=True, + mode="a", + dimension_separator=None, + ): # store properties path = os.path.abspath(path) @@ -1752,8 +1783,7 @@ def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode=' self.mutex = RLock() # open zip file - self.zf = zipfile.ZipFile(path, mode=mode, compression=compression, - allowZip64=allowZip64) + self.zf = zipfile.ZipFile(path, mode=mode, compression=compression, allowZip64=allowZip64) def __getstate__(self): self.flush() @@ -1763,10 +1793,9 @@ def __setstate__(self, state): path, compression, allowZip64, mode = state # if initially opened with mode 'w' or 'x', re-open in mode 'a' so file doesn't # get clobbered - if mode in 'wx': - mode = 'a' - self.__init__(path=path, compression=compression, allowZip64=allowZip64, - mode=mode) + if mode in "wx": + mode = "a" + self.__init__(path=path, compression=compression, allowZip64=allowZip64, mode=mode) def close(self): """Closes the underlying zip file, ensuring all records are written.""" @@ -1776,14 +1805,14 @@ def close(self): def flush(self): """Closes the underlying zip file, ensuring all records are written, then re-opens the file for further modifications.""" - if self.mode != 'r': + if self.mode != "r": with self.mutex: self.zf.close() # N.B., re-open with mode 'a' regardless of initial mode so we don't wipe # what's been written - self.zf = zipfile.ZipFile(self.path, mode='a', - compression=self.compression, - allowZip64=self.allowZip64) + self.zf = zipfile.ZipFile( + self.path, mode="a", compression=self.compression, allowZip64=self.allowZip64 + ) def __enter__(self): return self @@ -1797,21 +1826,20 @@ def __getitem__(self, key): return f.read() def __setitem__(self, key, value): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() value = ensure_contiguous_ndarray_like(value).view("u1") with self.mutex: # writestr(key, value) writes with default permissions from # zipfile (600) that are too restrictive, build ZipInfo for # the key to work around limitation - keyinfo = zipfile.ZipInfo(filename=key, - date_time=time.localtime(time.time())[:6]) + keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) keyinfo.compress_type = self.compression if keyinfo.filename[-1] == os.sep: - keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x - keyinfo.external_attr |= 0x10 # MS-DOS directory flag + keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x + keyinfo.external_attr |= 0x10 # MS-DOS directory flag else: - keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- + keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- self.zf.writestr(keyinfo, value) @@ -1820,10 +1848,10 @@ def __delitem__(self, key): def __eq__(self, other): return ( - isinstance(other, ZipStore) and - self.path == other.path and - self.compression == other.compression and - self.allowZip64 == other.allowZip64 + isinstance(other, ZipStore) + and self.path == other.path + and self.compression == other.compression + and self.allowZip64 == other.allowZip64 ) def keylist(self): @@ -1860,7 +1888,7 @@ def getsize(self, path=None): size = 0 for child in children: if path: - name = path + '/' + child + name = path + "/" + child else: name = child try: @@ -1880,14 +1908,14 @@ def getsize(self, path=None): return 0 def clear(self): - if self.mode == 'r': + if self.mode == "r": raise ReadOnlyError() with self.mutex: self.close() os.remove(self.path) - self.zf = zipfile.ZipFile(self.path, mode=self.mode, - compression=self.compression, - allowZip64=self.allowZip64) + self.zf = zipfile.ZipFile( + self.path, mode=self.mode, compression=self.compression, allowZip64=self.allowZip64 + ) def migrate_1to2(store): @@ -1909,37 +1937,38 @@ def migrate_1to2(store): # migrate metadata from zarr import meta_v1 - meta = meta_v1.decode_metadata(store['meta']) - del store['meta'] + + meta = meta_v1.decode_metadata(store["meta"]) + del store["meta"] # add empty filters - meta['filters'] = None + meta["filters"] = None # migration compression metadata - compression = meta['compression'] - if compression is None or compression == 'none': + compression = meta["compression"] + if compression is None or compression == "none": compressor_config = None else: - compression_opts = meta['compression_opts'] + compression_opts = meta["compression_opts"] codec_cls = codec_registry[compression] if isinstance(compression_opts, dict): compressor = codec_cls(**compression_opts) else: compressor = codec_cls(compression_opts) compressor_config = compressor.get_config() - meta['compressor'] = compressor_config - del meta['compression'] - del meta['compression_opts'] + meta["compressor"] = compressor_config + del meta["compression"] + del meta["compression_opts"] # store migrated metadata - if hasattr(store, '_metadata_class'): + if hasattr(store, "_metadata_class"): store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) else: store[array_meta_key] = encode_array_metadata(meta) # migrate user attributes - store[attrs_key] = store['attrs'] - del store['attrs'] + store[attrs_key] = store["attrs"] + del store["attrs"] # noinspection PyShadowingBuiltins @@ -2024,11 +2053,19 @@ class DBMStore(Store): """ - def __init__(self, path, flag='c', mode=0o666, open=None, write_lock=True, - dimension_separator=None, - **open_kwargs): + def __init__( + self, + path, + flag="c", + mode=0o666, + open=None, + write_lock=True, + dimension_separator=None, + **open_kwargs, + ): if open is None: import dbm + open = dbm.open path = os.path.abspath(path) # noinspection PyArgumentList @@ -2053,27 +2090,25 @@ def __getstate__(self): except Exception: # flush may fail if db has already been closed pass - return (self.path, self.flag, self.mode, self.open, self.write_lock, - self.open_kwargs) + return (self.path, self.flag, self.mode, self.open, self.write_lock, self.open_kwargs) def __setstate__(self, state): path, flag, mode, open, write_lock, open_kws = state - if flag[0] == 'n': - flag = 'c' + flag[1:] # don't clobber an existing database - self.__init__(path=path, flag=flag, mode=mode, open=open, - write_lock=write_lock, **open_kws) + if flag[0] == "n": + flag = "c" + flag[1:] # don't clobber an existing database + self.__init__(path=path, flag=flag, mode=mode, open=open, write_lock=write_lock, **open_kws) def close(self): """Closes the underlying database file.""" - if hasattr(self.db, 'close'): + if hasattr(self.db, "close"): with self.write_mutex: self.db.close() def flush(self): """Synchronizes data to the underlying database file.""" - if self.flag[0] != 'r': + if self.flag[0] != "r": with self.write_mutex: - if hasattr(self.db, 'sync'): + if hasattr(self.db, "sync"): self.db.sync() else: # pragma: no cover # we don't cover this branch anymore as ndbm (oracle) is not packaged @@ -2081,8 +2116,8 @@ def flush(self): # https://github.com/conda-forge/staged-recipes/issues/4476 # fall-back, close and re-open, needed for ndbm flag = self.flag - if flag[0] == 'n': - flag = 'c' + flag[1:] # don't clobber an existing database + if flag[0] == "n": + flag = "c" + flag[1:] # don't clobber an existing database self.db.close() # noinspection PyArgumentList self.db = self.open(self.path, flag, self.mode, **self.open_kwargs) @@ -2113,11 +2148,12 @@ def __delitem__(self, key): def __eq__(self, other): return ( - isinstance(other, DBMStore) and - self.path == other.path and + isinstance(other, DBMStore) + and self.path == other.path + and # allow flag and mode to differ - self.open == other.open and - self.open_kwargs == other.open_kwargs + self.open == other.open + and self.open_kwargs == other.open_kwargs ) def keys(self): @@ -2200,28 +2236,28 @@ def __init__(self, path, buffers=True, dimension_separator=None, **kwargs): # set default memory map size to something larger than the lmdb default, which is # very likely to be too small for any moderate array (logic copied from zict) - map_size = (2**40 if sys.maxsize >= 2**32 else 2**28) - kwargs.setdefault('map_size', map_size) + map_size = 2**40 if sys.maxsize >= 2**32 else 2**28 + kwargs.setdefault("map_size", map_size) # don't initialize buffers to zero by default, shouldn't be necessary - kwargs.setdefault('meminit', False) + kwargs.setdefault("meminit", False) # decide whether to use the writemap option based on the operating system's # support for sparse files - writemap requires sparse file support otherwise # the whole# `map_size` may be reserved up front on disk (logic copied from zict) - writemap = sys.platform.startswith('linux') - kwargs.setdefault('writemap', writemap) + writemap = sys.platform.startswith("linux") + kwargs.setdefault("writemap", writemap) # decide options for when data are flushed to disk - choose to delay syncing # data to filesystem, otherwise pay a large performance penalty (zict also does # this) - kwargs.setdefault('metasync', False) - kwargs.setdefault('sync', False) - kwargs.setdefault('map_async', False) + kwargs.setdefault("metasync", False) + kwargs.setdefault("sync", False) + kwargs.setdefault("map_async", False) # set default option for number of cached transactions max_spare_txns = multiprocessing.cpu_count() - kwargs.setdefault('max_spare_txns', max_spare_txns) + kwargs.setdefault("max_spare_txns", max_spare_txns) # normalize path path = os.path.abspath(path) @@ -2312,7 +2348,7 @@ def __iter__(self): return self.keys() def __len__(self): - return self.db.stat()['entries'] + return self.db.stat()["entries"] class LRUStoreCache(Store): @@ -2364,14 +2400,30 @@ def __init__(self, store: StoreLike, max_size: int): self.hits = self.misses = 0 def __getstate__(self): - return (self._store, self._max_size, self._current_size, self._keys_cache, - self._contains_cache, self._listdir_cache, self._values_cache, self.hits, - self.misses) + return ( + self._store, + self._max_size, + self._current_size, + self._keys_cache, + self._contains_cache, + self._listdir_cache, + self._values_cache, + self.hits, + self.misses, + ) def __setstate__(self, state): - (self._store, self._max_size, self._current_size, self._keys_cache, - self._contains_cache, self._listdir_cache, self._values_cache, self.hits, - self.misses) = state + ( + self._store, + self._max_size, + self._current_size, + self._keys_cache, + self._contains_cache, + self._listdir_cache, + self._values_cache, + self.hits, + self.misses, + ) = state self._mutex = Lock() def __len__(self): @@ -2536,7 +2588,7 @@ def __init__(self, path, dimension_separator=None, **kwargs): self._dimension_separator = dimension_separator # normalize path - if path != ':memory:': + if path != ":memory:": path = os.path.abspath(path) # store properties @@ -2560,7 +2612,7 @@ def __init__(self, path, dimension_separator=None, **kwargs): detect_types=0, isolation_level=None, check_same_thread=check_same_thread, - **self.kwargs + **self.kwargs, ) # handle keys as `str`s @@ -2571,13 +2623,11 @@ def __init__(self, path, dimension_separator=None, **kwargs): # initialize database with our table if missing with self.lock: - self.cursor.execute( - 'CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)' - ) + self.cursor.execute("CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)") def __getstate__(self): - if self.path == ':memory:': - raise PicklingError('Cannot pickle in-memory SQLite databases') + if self.path == ":memory:": + raise PicklingError("Cannot pickle in-memory SQLite databases") return self.path, self.kwargs def __setstate__(self, state): @@ -2592,8 +2642,8 @@ def close(self): self.db.close() def __getitem__(self, key): - value = self.cursor.execute('SELECT v FROM zarr WHERE (k = ?)', (key,)) - for v, in value: + value = self.cursor.execute("SELECT v FROM zarr WHERE (k = ?)", (key,)) + for (v,) in value: return v raise KeyError(key) @@ -2602,38 +2652,36 @@ def __setitem__(self, key, value): def __delitem__(self, key): with self.lock: - self.cursor.execute('DELETE FROM zarr WHERE (k = ?)', (key,)) + self.cursor.execute("DELETE FROM zarr WHERE (k = ?)", (key,)) if self.cursor.rowcount < 1: raise KeyError(key) def __contains__(self, key): - cs = self.cursor.execute( - 'SELECT COUNT(*) FROM zarr WHERE (k = ?)', (key,) - ) - for has, in cs: + cs = self.cursor.execute("SELECT COUNT(*) FROM zarr WHERE (k = ?)", (key,)) + for (has,) in cs: has = bool(has) return has def items(self): - kvs = self.cursor.execute('SELECT k, v FROM zarr') + kvs = self.cursor.execute("SELECT k, v FROM zarr") yield from kvs def keys(self): - ks = self.cursor.execute('SELECT k FROM zarr') - for k, in ks: + ks = self.cursor.execute("SELECT k FROM zarr") + for (k,) in ks: yield k def values(self): - vs = self.cursor.execute('SELECT v FROM zarr') - for v, in vs: + vs = self.cursor.execute("SELECT v FROM zarr") + for (v,) in vs: yield v def __iter__(self): return self.keys() def __len__(self): - cs = self.cursor.execute('SELECT COUNT(*) FROM zarr') - for c, in cs: + cs = self.cursor.execute("SELECT COUNT(*) FROM zarr") + for (c,) in cs: return c def update(self, *args, **kwargs): @@ -2648,19 +2696,21 @@ def update(self, *args, **kwargs): kv_list.append((k, v)) with self.lock: - self.cursor.executemany('REPLACE INTO zarr VALUES (?, ?)', kv_list) + self.cursor.executemany("REPLACE INTO zarr VALUES (?, ?)", kv_list) def listdir(self, path=None): path = normalize_storage_path(path) - sep = '_' if path == '' else '/' + sep = "_" if path == "" else "/" keys = self.cursor.execute( - ''' + """ SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM ( SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m FROM zarr WHERE k LIKE (? || "{sep}%") ) ORDER BY l ASC - '''.format(sep=sep), - (path, path) + """.format( + sep=sep + ), + (path, path), ) keys = list(map(operator.itemgetter(0), keys)) return keys @@ -2668,35 +2718,33 @@ def listdir(self, path=None): def getsize(self, path=None): path = normalize_storage_path(path) size = self.cursor.execute( - ''' + """ SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr WHERE k LIKE (? || "%") AND 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - ''', - (path, path) + """, + (path, path), ) - for s, in size: + for (s,) in size: return s def rmdir(self, path=None): path = normalize_storage_path(path) if path: with self.lock: - self.cursor.execute( - 'DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,) - ) + self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (path,)) else: self.clear() def clear(self): with self.lock: self.cursor.executescript( - ''' + """ BEGIN TRANSACTION; DROP TABLE zarr; CREATE TABLE zarr(k TEXT PRIMARY KEY, v BLOB); COMMIT TRANSACTION; - ''' + """ ) @@ -2725,11 +2773,16 @@ class MongoDBStore(Store): """ - _key = 'key' - _value = 'value' + _key = "key" + _value = "value" - def __init__(self, database='mongodb_zarr', collection='zarr_collection', - dimension_separator=None, **kwargs): + def __init__( + self, + database="mongodb_zarr", + collection="zarr_collection", + dimension_separator=None, + **kwargs, + ): import pymongo self._database = database @@ -2751,9 +2804,9 @@ def __getitem__(self, key): def __setitem__(self, key, value): value = ensure_bytes(value) - self.collection.replace_one({self._key: key}, - {self._key: key, self._value: value}, - upsert=True) + self.collection.replace_one( + {self._key: key}, {self._key: key, self._value: value}, upsert=True + ) def __delitem__(self, key): result = self.collection.delete_many({self._key: key}) @@ -2801,8 +2854,10 @@ class RedisStore(Store): Keyword arguments passed through to the `redis.Redis` function. """ - def __init__(self, prefix='zarr', dimension_separator=None, **kwargs): + + def __init__(self, prefix="zarr", dimension_separator=None, **kwargs): import redis + self._prefix = prefix self._kwargs = kwargs self._dimension_separator = dimension_separator @@ -2810,7 +2865,7 @@ def __init__(self, prefix='zarr', dimension_separator=None, **kwargs): self.client = redis.Redis(**kwargs) def _key(self, key): - return '{prefix}:{key}'.format(prefix=self._prefix, key=key) + return "{prefix}:{key}".format(prefix=self._prefix, key=key) def __getitem__(self, key): return self.client[self._key(key)] @@ -2825,9 +2880,8 @@ def __delitem__(self, key): raise KeyError(key) def keylist(self): - offset = len(self._key('')) # length of prefix - return [key[offset:].decode('utf-8') - for key in self.client.keys(self._key('*'))] + offset = len(self._key("")) # length of prefix + return [key[offset:].decode("utf-8") for key in self.client.keys(self._key("*"))] def keys(self): yield from self.keylist() @@ -2893,10 +2947,11 @@ def __init__(self, store: StoreLike, metadata_key=".zmetadata"): meta = json_loads(self.store[metadata_key]) # check format of consolidated metadata - consolidated_format = meta.get('zarr_consolidated_format', None) + consolidated_format = meta.get("zarr_consolidated_format", None) if consolidated_format != 1: - raise MetadataError('unsupported zarr consolidated metadata format: %s' % - consolidated_format) + raise MetadataError( + "unsupported zarr consolidated metadata format: %s" % consolidated_format + ) # decode metadata self.meta_store: Store = KVStore(meta["metadata"]) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index d6151b4f29..7dd5b340a2 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -24,31 +24,30 @@ def _init_store(version): return KVStoreV3(dict()) -class TestAttributes(): - +class TestAttributes: def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): - root = '.z' if zarr_version == 2 else meta_root - return Attributes(store, key=root + 'attrs', read_only=read_only, cache=cache) + root = ".z" if zarr_version == 2 else meta_root + return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) def test_storage(self, zarr_version): store = _init_store(zarr_version) - root = '.z' if zarr_version == 2 else meta_root - attrs_key = root + 'attrs' + root = ".z" if zarr_version == 2 else meta_root + attrs_key = root + "attrs" a = Attributes(store=store, key=attrs_key) assert isinstance(a.store, KVStore) - assert 'foo' not in a - assert 'bar' not in a + assert "foo" not in a + assert "bar" not in a assert dict() == a.asdict() - a['foo'] = 'bar' - a['baz'] = 42 + a["foo"] = "bar" + a["baz"] = 42 assert attrs_key in store assert isinstance(store[attrs_key], bytes) - d = json.loads(str(store[attrs_key], 'utf-8')) + d = json.loads(str(store[attrs_key], "utf-8")) if zarr_version == 3: - d = d['attributes'] - assert dict(foo='bar', baz=42) == d + d = d["attributes"] + assert dict(foo="bar", baz=42) == d def test_utf8_encoding(self, zarr_version): @@ -65,42 +64,42 @@ def test_utf8_encoding(self, zarr_version): # fixture data fixture = group(store=DirectoryStore(str(fixdir))) - assert fixture['utf8attrs'].attrs.asdict() == dict(foo='た') + assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") def test_get_set_del_contains(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) - assert 'foo' not in a - a['foo'] = 'bar' - a['baz'] = 42 - assert 'foo' in a - assert 'baz' in a - assert 'bar' == a['foo'] - assert 42 == a['baz'] - del a['foo'] - assert 'foo' not in a + assert "foo" not in a + a["foo"] = "bar" + a["baz"] = 42 + assert "foo" in a + assert "baz" in a + assert "bar" == a["foo"] + assert 42 == a["baz"] + del a["foo"] + assert "foo" not in a with pytest.raises(KeyError): # noinspection PyStatementEffect - a['foo'] + a["foo"] def test_update_put(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) - assert 'foo' not in a - assert 'bar' not in a - assert 'baz' not in a + assert "foo" not in a + assert "bar" not in a + assert "baz" not in a - a.update(foo='spam', bar=42, baz=4.2) - assert a['foo'] == 'spam' - assert a['bar'] == 42 - assert a['baz'] == 4.2 + a.update(foo="spam", bar=42, baz=4.2) + assert a["foo"] == "spam" + assert a["bar"] == 42 + assert a["baz"] == 4.2 - a.put(dict(foo='eggs', bar=84)) - assert a['foo'] == 'eggs' - assert a['bar'] == 84 - assert 'baz' not in a + a.put(dict(foo="eggs", bar=84)) + assert a["foo"] == "eggs" + assert a["bar"] == 84 + assert "baz" not in a def test_iterators(self, zarr_version): @@ -112,182 +111,182 @@ def test_iterators(self, zarr_version): assert set() == set(a.values()) assert set() == set(a.items()) - a['foo'] = 'bar' - a['baz'] = 42 + a["foo"] = "bar" + a["baz"] = 42 assert 2 == len(a) - assert {'foo', 'baz'} == set(a) - assert {'foo', 'baz'} == set(a.keys()) - assert {'bar', 42} == set(a.values()) - assert {('foo', 'bar'), ('baz', 42)} == set(a.items()) + assert {"foo", "baz"} == set(a) + assert {"foo", "baz"} == set(a.keys()) + assert {"bar", 42} == set(a.values()) + assert {("foo", "bar"), ("baz", 42)} == set(a.items()) def test_read_only(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, read_only=True, zarr_version=zarr_version) if zarr_version == 2: - store['.zattrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii') + store[".zattrs"] = json.dumps(dict(foo="bar", baz=42)).encode("ascii") else: - store['meta/root/attrs'] = json.dumps( - dict(attributes=dict(foo='bar', baz=42)) - ).encode('ascii') - assert a['foo'] == 'bar' - assert a['baz'] == 42 + store["meta/root/attrs"] = json.dumps(dict(attributes=dict(foo="bar", baz=42))).encode( + "ascii" + ) + assert a["foo"] == "bar" + assert a["baz"] == 42 with pytest.raises(PermissionError): - a['foo'] = 'quux' + a["foo"] = "quux" with pytest.raises(PermissionError): - del a['foo'] + del a["foo"] with pytest.raises(PermissionError): - a.update(foo='quux') + a.update(foo="quux") def test_key_completions(self, zarr_version): store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) d = a._ipython_key_completions_() - assert 'foo' not in d - assert '123' not in d - assert 'baz' not in d - assert 'asdf;' not in d - a['foo'] = 42 - a['123'] = 4.2 - a['asdf;'] = 'ghjkl;' + assert "foo" not in d + assert "123" not in d + assert "baz" not in d + assert "asdf;" not in d + a["foo"] = 42 + a["123"] = 4.2 + a["asdf;"] = "ghjkl;" d = a._ipython_key_completions_() - assert 'foo' in d - assert '123' in d - assert 'asdf;' in d - assert 'baz' not in d + assert "foo" in d + assert "123" in d + assert "asdf;" in d + assert "baz" not in d def test_caching_on(self, zarr_version): # caching is turned on by default # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' - assert 0 == store.counter['__getitem__', attrs_key] - assert 0 == store.counter['__setitem__', attrs_key] + attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" + assert 0 == store.counter["__getitem__", attrs_key] + assert 0 == store.counter["__setitem__", attrs_key] if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert 0 == store.counter['__getitem__', attrs_key] - assert 1 == store.counter['__setitem__', attrs_key] + store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + assert 0 == store.counter["__getitem__", attrs_key] + assert 1 == store.counter["__setitem__", attrs_key] # setup attributes a = self.init_attributes(store, zarr_version=zarr_version) # test __getitem__ causes all attributes to be cached - assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', attrs_key] - assert a['bar'] == 42 - assert 1 == store.counter['__getitem__', attrs_key] - assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', attrs_key] + assert a["foo"] == "xxx" + assert 1 == store.counter["__getitem__", attrs_key] + assert a["bar"] == 42 + assert 1 == store.counter["__getitem__", attrs_key] + assert a["foo"] == "xxx" + assert 1 == store.counter["__getitem__", attrs_key] # test __setitem__ updates the cache - a['foo'] = 'yyy' + a["foo"] = "yyy" get_cnt = 2 if zarr_version == 2 else 3 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'yyy' - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "yyy" + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] # test update() updates the cache - a.update(foo='zzz', bar=84) + a.update(foo="zzz", bar=84) get_cnt = 3 if zarr_version == 2 else 5 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'zzz' - assert a['bar'] == 84 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "zzz" + assert a["bar"] == 84 + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] # test __contains__ uses the cache - assert 'foo' in a - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert 'spam' not in a - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert "foo" in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert "spam" not in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] # test __delitem__ updates the cache - del a['bar'] + del a["bar"] get_cnt = 4 if zarr_version == 2 else 7 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 4 == store.counter['__setitem__', attrs_key] - assert 'bar' not in a - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 4 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 4 == store.counter["__setitem__", attrs_key] + assert "bar" not in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 4 == store.counter["__setitem__", attrs_key] # test refresh() if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert get_cnt == store.counter['__getitem__', attrs_key] + store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + assert get_cnt == store.counter["__getitem__", attrs_key] a.refresh() get_cnt = 5 if zarr_version == 2 else 8 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert a['foo'] == 'xxx' - assert get_cnt == store.counter['__getitem__', attrs_key] - assert a['bar'] == 42 - assert get_cnt == store.counter['__getitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert a["foo"] == "xxx" + assert get_cnt == store.counter["__getitem__", attrs_key] + assert a["bar"] == 42 + assert get_cnt == store.counter["__getitem__", attrs_key] def test_caching_off(self, zarr_version): # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = '.zattrs' if zarr_version == 2 else 'meta/root/attrs' - assert 0 == store.counter['__getitem__', attrs_key] - assert 0 == store.counter['__setitem__', attrs_key] + attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" + assert 0 == store.counter["__getitem__", attrs_key] + assert 0 == store.counter["__setitem__", attrs_key] if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii') + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii') - assert 0 == store.counter['__getitem__', attrs_key] - assert 1 == store.counter['__setitem__', attrs_key] + store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + assert 0 == store.counter["__getitem__", attrs_key] + assert 1 == store.counter["__setitem__", attrs_key] # setup attributes a = self.init_attributes(store, cache=False, zarr_version=zarr_version) # test __getitem__ - assert a['foo'] == 'xxx' - assert 1 == store.counter['__getitem__', attrs_key] - assert a['bar'] == 42 - assert 2 == store.counter['__getitem__', attrs_key] - assert a['foo'] == 'xxx' - assert 3 == store.counter['__getitem__', attrs_key] + assert a["foo"] == "xxx" + assert 1 == store.counter["__getitem__", attrs_key] + assert a["bar"] == 42 + assert 2 == store.counter["__getitem__", attrs_key] + assert a["foo"] == "xxx" + assert 3 == store.counter["__getitem__", attrs_key] # test __setitem__ - a['foo'] = 'yyy' + a["foo"] = "yyy" get_cnt = 4 if zarr_version == 2 else 5 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'yyy' + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "yyy" get_cnt = 5 if zarr_version == 2 else 6 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 2 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 2 == store.counter["__setitem__", attrs_key] # test update() - a.update(foo='zzz', bar=84) + a.update(foo="zzz", bar=84) get_cnt = 6 if zarr_version == 2 else 8 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert a['foo'] == 'zzz' - assert a['bar'] == 84 + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert a["foo"] == "zzz" + assert a["bar"] == 84 get_cnt = 8 if zarr_version == 2 else 10 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] # test __contains__ - assert 'foo' in a + assert "foo" in a get_cnt = 9 if zarr_version == 2 else 11 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] - assert 'spam' not in a + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] + assert "spam" not in a get_cnt = 10 if zarr_version == 2 else 12 - assert get_cnt == store.counter['__getitem__', attrs_key] - assert 3 == store.counter['__setitem__', attrs_key] + assert get_cnt == store.counter["__getitem__", attrs_key] + assert 3 == store.counter["__setitem__", attrs_key] def test_wrong_keys(self, zarr_version): store = _init_store(zarr_version) diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 45ed9c3e11..389ce90a9d 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -45,17 +45,17 @@ ) from zarr.tests.util import have_fsspec -_VERSIONS = ((2, 3) if v3_api_available else (2, )) +_VERSIONS = (2, 3) if v3_api_available else (2,) def _init_creation_kwargs(zarr_version): - kwargs = {'zarr_version': zarr_version} + kwargs = {"zarr_version": zarr_version} if zarr_version == 3: - kwargs['path'] = 'dataset' + kwargs["path"] = "dataset" return kwargs -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_array(path_type, zarr_version): store = tempfile.mkdtemp() @@ -64,24 +64,24 @@ def test_open_array(path_type, zarr_version): kwargs = _init_creation_kwargs(zarr_version) # open array, create if doesn't exist - z = open(store, mode='a', shape=100, **kwargs) + z = open(store, mode="a", shape=100, **kwargs) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite - z = open(store, mode='w', shape=200, **kwargs) + z = open(store, mode="w", shape=200, **kwargs) assert isinstance(z, Array) assert z.shape == (200,) # open array, read-only - z = open(store, mode='r', **kwargs) + z = open(store, mode="r", **kwargs) assert isinstance(z, Array) assert z.shape == (200,) assert z.read_only # path not found with pytest.raises(ValueError): - open('doesnotexist', mode='r') + open("doesnotexist", mode="r") @pytest.mark.parametrize("zarr_version", _VERSIONS) @@ -93,18 +93,18 @@ def test_open_group(path_type, zarr_version): kwargs = _init_creation_kwargs(zarr_version) # open group, create if doesn't exist - g = open(store, mode='a', **kwargs) - g.create_group('foo') + g = open(store, mode="a", **kwargs) + g.create_group("foo") assert isinstance(g, Group) - assert 'foo' in g + assert "foo" in g # open group, overwrite - g = open(store, mode='w', **kwargs) + g = open(store, mode="w", **kwargs) assert isinstance(g, Group) - assert 'foo' not in g + assert "foo" not in g # open group, read-only - g = open(store, mode='r', **kwargs) + g = open(store, mode="r", **kwargs) assert isinstance(g, Group) assert g.read_only @@ -113,13 +113,13 @@ def test_open_group(path_type, zarr_version): def test_save_errors(zarr_version): with pytest.raises(ValueError): # no arrays provided - save_group('data/group.zarr', zarr_version=zarr_version) + save_group("data/group.zarr", zarr_version=zarr_version) with pytest.raises(TypeError): # no array provided - save_array('data/group.zarr', zarr_version=zarr_version) + save_array("data/group.zarr", zarr_version=zarr_version) with pytest.raises(ValueError): # no arrays provided - save('data/group.zarr', zarr_version=zarr_version) + save("data/group.zarr", zarr_version=zarr_version) @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @@ -128,12 +128,12 @@ def test_zarr_v3_save_multiple_unnamed(): y = np.zeros(8) store = KVStoreV3(dict()) # no path provided - save_group(store, x, y, path='dataset', zarr_version=3) + save_group(store, x, y, path="dataset", zarr_version=3) # names become arr_{i} for unnamed *args - assert data_root + 'dataset/arr_0/c0' in store - assert data_root + 'dataset/arr_1/c0' in store - assert meta_root + 'dataset/arr_0.array.json' in store - assert meta_root + 'dataset/arr_1.array.json' in store + assert data_root + "dataset/arr_0/c0" in store + assert data_root + "dataset/arr_1/c0" in store + assert meta_root + "dataset/arr_0.array.json" in store + assert meta_root + "dataset/arr_1.array.json" in store @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @@ -141,47 +141,47 @@ def test_zarr_v3_save_errors(): x = np.ones(8) with pytest.raises(ValueError): # no path provided - save_group('data/group.zr3', x, zarr_version=3) + save_group("data/group.zr3", x, zarr_version=3) with pytest.raises(ValueError): # no path provided - save_array('data/group.zr3', x, zarr_version=3) + save_array("data/group.zr3", x, zarr_version=3) with pytest.raises(ValueError): # no path provided - save('data/group.zr3', x, zarr_version=3) + save("data/group.zr3", x, zarr_version=3) @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_lazy_loader(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' + store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" kwargs = _init_creation_kwargs(zarr_version) save(store, foo=foo, bar=bar, **kwargs) loader = load(store, **kwargs) - assert 'foo' in loader - assert 'bar' in loader - assert 'baz' not in loader + assert "foo" in loader + assert "bar" in loader + assert "baz" not in loader assert len(loader) == 2 - assert sorted(loader) == ['bar', 'foo'] - assert_array_equal(foo, loader['foo']) - assert_array_equal(bar, loader['bar']) - assert 'LazyLoader: ' in repr(loader) + assert sorted(loader) == ["bar", "foo"] + assert_array_equal(foo, loader["foo"]) + assert_array_equal(bar, loader["bar"]) + assert "LazyLoader: " in repr(loader) @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_load_array(zarr_version): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = 'data/group.zarr' if zarr_version == 2 else 'data/group.zr3' + store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" kwargs = _init_creation_kwargs(zarr_version) save(store, foo=foo, bar=bar, **kwargs) # can also load arrays directly into a numpy array - for array_name in ['foo', 'bar']: - array_path = 'dataset/' + array_name if zarr_version == 3 else array_name + for array_name in ["foo", "bar"]: + array_path = "dataset/" + array_name if zarr_version == 3 else array_name array = load(store, path=array_path, zarr_version=zarr_version) assert isinstance(array, np.ndarray) - if array_name == 'foo': + if array_name == "foo": assert_array_equal(foo, array) else: assert_array_equal(bar, array) @@ -191,27 +191,25 @@ def test_load_array(zarr_version): def test_tree(zarr_version): kwargs = _init_creation_kwargs(zarr_version) g1 = zarr.group(**kwargs) - g1.create_group('foo') - g3 = g1.create_group('bar') - g3.create_group('baz') - g5 = g3.create_group('qux') - g5.create_dataset('baz', shape=100, chunks=10) + g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("qux") + g5.create_dataset("baz", shape=100, chunks=10) assert repr(zarr.tree(g1)) == repr(g1.tree()) assert str(zarr.tree(g1)) == str(g1.tree()) -@pytest.mark.parametrize('zarr_version', _VERSIONS) -@pytest.mark.parametrize('stores_from_path', [False, True]) +@pytest.mark.parametrize("zarr_version", _VERSIONS) +@pytest.mark.parametrize("stores_from_path", [False, True]) @pytest.mark.parametrize( - 'with_chunk_store,listable', + "with_chunk_store,listable", [(False, True), (True, True), (False, False)], - ids=['default-listable', 'with_chunk_store-listable', 'default-unlistable'] + ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], ) -def test_consolidate_metadata(with_chunk_store, - zarr_version, - listable, - monkeypatch, - stores_from_path): +def test_consolidate_metadata( + with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path +): # setup initial data if stores_from_path: @@ -222,7 +220,7 @@ def test_consolidate_metadata(with_chunk_store, atexit.register(atexit_rmtree, chunk_store) else: chunk_store = None - version_kwarg = {'zarr_version': zarr_version} + version_kwarg = {"zarr_version": zarr_version} else: if zarr_version == 2: store = MemoryStore() @@ -231,19 +229,19 @@ def test_consolidate_metadata(with_chunk_store, store = MemoryStoreV3() chunk_store = MemoryStoreV3() if with_chunk_store else None version_kwarg = {} - path = 'dataset' if zarr_version == 3 else None + path = "dataset" if zarr_version == 3 else None z = group(store, chunk_store=chunk_store, path=path, **version_kwarg) # Reload the actual store implementation in case str store_to_copy = z.store - z.create_group('g1') - g2 = z.create_group('g2') - g2.attrs['hello'] = 'world' - arr = g2.create_dataset('arr', shape=(20, 20), chunks=(5, 5), dtype='f8') + z.create_group("g1") + g2 = z.create_group("g2") + g2.attrs["hello"] = "world" + arr = g2.create_dataset("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") assert 16 == arr.nchunks assert 0 == arr.nchunks_initialized - arr.attrs['data'] = 1 + arr.attrs["data"] = 1 arr[:] = 1.0 assert 16 == arr.nchunks_initialized @@ -259,31 +257,35 @@ def test_consolidate_metadata(with_chunk_store, consolidate_metadata(store_class, path=None) with pytest.raises(ValueError): - consolidate_metadata(store_class, path='') + consolidate_metadata(store_class, path="") # perform consolidation out = consolidate_metadata(store_class, path=path) assert isinstance(out, Group) - assert ['g1', 'g2'] == list(out) + assert ["g1", "g2"] == list(out) if not stores_from_path: if zarr_version == 2: assert isinstance(out._store, ConsolidatedMetadataStore) - assert '.zmetadata' in store - meta_keys = ['.zgroup', - 'g1/.zgroup', - 'g2/.zgroup', - 'g2/.zattrs', - 'g2/arr/.zarray', - 'g2/arr/.zattrs'] + assert ".zmetadata" in store + meta_keys = [ + ".zgroup", + "g1/.zgroup", + "g2/.zgroup", + "g2/.zattrs", + "g2/arr/.zarray", + "g2/arr/.zattrs", + ] else: assert isinstance(out._store, ConsolidatedMetadataStoreV3) - assert 'meta/root/consolidated/.zmetadata' in store - meta_keys = ['zarr.json', - meta_root + 'dataset.group.json', - meta_root + 'dataset/g1.group.json', - meta_root + 'dataset/g2.group.json', - meta_root + 'dataset/g2/arr.array.json', - 'meta/root/consolidated.group.json'] + assert "meta/root/consolidated/.zmetadata" in store + meta_keys = [ + "zarr.json", + meta_root + "dataset.group.json", + meta_root + "dataset/g1.group.json", + meta_root + "dataset/g2.group.json", + meta_root + "dataset/g2/arr.array.json", + "meta/root/consolidated.group.json", + ] for key in meta_keys: del store[key] @@ -307,9 +309,9 @@ def test_consolidate_metadata(with_chunk_store, # open consolidated z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path, **version_kwarg) - assert ['g1', 'g2'] == list(z2) - assert 'world' == z2.g2.attrs['hello'] - assert 1 == z2.g2.arr.attrs['data'] + assert ["g1", "g2"] == list(z2) + assert "world" == z2.g2.attrs["hello"] + assert 1 == z2.g2.arr.attrs["data"] assert (z2.g2.arr[:] == 1.0).all() assert 16 == z2.g2.arr.nchunks if listable: @@ -332,32 +334,32 @@ def test_consolidate_metadata(with_chunk_store, if zarr_version == 2: cmd = ConsolidatedMetadataStore(store) with pytest.raises(PermissionError): - del cmd['.zgroup'] + del cmd[".zgroup"] with pytest.raises(PermissionError): - cmd['.zgroup'] = None + cmd[".zgroup"] = None else: cmd = ConsolidatedMetadataStoreV3(store) with pytest.raises(PermissionError): - del cmd[meta_root + 'dataset.group.json'] + del cmd[meta_root + "dataset.group.json"] with pytest.raises(PermissionError): - cmd[meta_root + 'dataset.group.json'] = None + cmd[meta_root + "dataset.group.json"] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) # test new metadata are not writeable with pytest.raises(PermissionError): - z2.create_group('g3') + z2.create_group("g3") with pytest.raises(PermissionError): - z2.create_dataset('spam', shape=42, chunks=7, dtype='i4') + z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") with pytest.raises(PermissionError): - del z2['g2'] + del z2["g2"] # test consolidated metadata are not writeable with pytest.raises(PermissionError): - z2.g2.attrs['hello'] = 'universe' + z2.g2.attrs["hello"] = "universe" with pytest.raises(PermissionError): - z2.g2.arr.attrs['foo'] = 'bar' + z2.g2.arr.attrs["foo"] = "bar" # test the data are writeable z2.g2.arr[:] = 2 @@ -365,24 +367,31 @@ def test_consolidate_metadata(with_chunk_store, # test invalid modes with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode='a', path=path) + open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode='w', path=path) + open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) with pytest.raises(ValueError): - open_consolidated(store, chunk_store=chunk_store, mode='w-', path=path) + open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) # make sure keyword arguments are passed through without error open_consolidated( - store, chunk_store=chunk_store, path=path, cache_attrs=True, synchronizer=None, + store, + chunk_store=chunk_store, + path=path, + cache_attrs=True, + synchronizer=None, **version_kwarg, ) -@pytest.mark.parametrize("options", ( - {"dimension_separator": "/"}, - {"dimension_separator": "."}, - {"dimension_separator": None}, -)) +@pytest.mark.parametrize( + "options", + ( + {"dimension_separator": "/"}, + {"dimension_separator": "."}, + {"dimension_separator": None}, + ), +) def test_save_array_separator(tmpdir, options): data = np.arange(6).reshape((3, 2)) url = tmpdir.join("test.zarr") @@ -395,9 +404,9 @@ class TestCopyStore(unittest.TestCase): def setUp(self): source = dict() - source['foo'] = b'xxx' - source['bar/baz'] = b'yyy' - source['bar/qux'] = b'zzz' + source["foo"] = b"xxx" + source["bar/baz"] = b"yyy" + source["bar/qux"] = b"zzz" self.source = source def _get_dest_store(self): @@ -414,13 +423,13 @@ def test_no_paths(self): def test_source_path(self): source = self.source # paths should be normalized - for source_path in 'bar', 'bar/', '/bar', '/bar/': + for source_path in "bar", "bar/", "/bar", "/bar/": dest = self._get_dest_store() copy_store(source, dest, source_path=source_path) assert 2 == len(dest) for key in source: - if key.startswith('bar/'): - dest_key = key.split('bar/')[1] + if key.startswith("bar/"): + dest_key = key.split("bar/")[1] assert source[key] == dest[dest_key] else: assert key not in dest @@ -428,64 +437,63 @@ def test_source_path(self): def test_dest_path(self): source = self.source # paths should be normalized - for dest_path in 'new', 'new/', '/new', '/new/': + for dest_path in "new", "new/", "/new", "/new/": dest = self._get_dest_store() copy_store(source, dest, dest_path=dest_path) assert len(source) == len(dest) for key in source: if self._version == 3: - dest_key = key[:10] + 'new/' + key[10:] + dest_key = key[:10] + "new/" + key[10:] else: - dest_key = 'new/' + key + dest_key = "new/" + key assert source[key] == dest[dest_key] def test_source_dest_path(self): source = self.source # paths should be normalized - for source_path in 'bar', 'bar/', '/bar', '/bar/': - for dest_path in 'new', 'new/', '/new', '/new/': + for source_path in "bar", "bar/", "/bar", "/bar/": + for dest_path in "new", "new/", "/new", "/new/": dest = self._get_dest_store() - copy_store(source, dest, source_path=source_path, - dest_path=dest_path) + copy_store(source, dest, source_path=source_path, dest_path=dest_path) assert 2 == len(dest) for key in source: - if key.startswith('bar/'): - dest_key = 'new/' + key.split('bar/')[1] + if key.startswith("bar/"): + dest_key = "new/" + key.split("bar/")[1] assert source[key] == dest[dest_key] else: assert key not in dest - assert ('new/' + key) not in dest + assert ("new/" + key) not in dest def test_excludes_includes(self): source = self.source # single excludes dest = self._get_dest_store() - excludes = 'f.*' + excludes = "f.*" copy_store(source, dest, excludes=excludes) assert len(dest) == 2 - root = '' if self._version == 2 else meta_root - assert root + 'foo' not in dest + root = "" if self._version == 2 else meta_root + assert root + "foo" not in dest # multiple excludes dest = self._get_dest_store() - excludes = 'b.z', '.*x' + excludes = "b.z", ".*x" copy_store(source, dest, excludes=excludes) assert len(dest) == 1 - assert root + 'foo' in dest - assert root + 'bar/baz' not in dest - assert root + 'bar/qux' not in dest + assert root + "foo" in dest + assert root + "bar/baz" not in dest + assert root + "bar/qux" not in dest # excludes and includes dest = self._get_dest_store() - excludes = 'b.*' - includes = '.*x' + excludes = "b.*" + includes = ".*x" copy_store(source, dest, excludes=excludes, includes=includes) assert len(dest) == 2 - assert root + 'foo' in dest - assert root + 'bar/baz' not in dest - assert root + 'bar/qux' in dest + assert root + "foo" in dest + assert root + "bar/baz" not in dest + assert root + "bar/qux" in dest def test_dry_run(self): source = self.source @@ -496,8 +504,8 @@ def test_dry_run(self): def test_if_exists(self): source = self.source dest = self._get_dest_store() - root = '' if self._version == 2 else meta_root - dest[root + 'bar/baz'] = b'mmm' + root = "" if self._version == 2 else meta_root + dest[root + "bar/baz"] = b"mmm" # default ('raise') with pytest.raises(CopyError): @@ -505,25 +513,25 @@ def test_if_exists(self): # explicit 'raise' with pytest.raises(CopyError): - copy_store(source, dest, if_exists='raise') + copy_store(source, dest, if_exists="raise") # skip - copy_store(source, dest, if_exists='skip') + copy_store(source, dest, if_exists="skip") assert 3 == len(dest) - assert dest[root + 'foo'] == b'xxx' - assert dest[root + 'bar/baz'] == b'mmm' - assert dest[root + 'bar/qux'] == b'zzz' + assert dest[root + "foo"] == b"xxx" + assert dest[root + "bar/baz"] == b"mmm" + assert dest[root + "bar/qux"] == b"zzz" # replace - copy_store(source, dest, if_exists='replace') + copy_store(source, dest, if_exists="replace") assert 3 == len(dest) - assert dest[root + 'foo'] == b'xxx' - assert dest[root + 'bar/baz'] == b'yyy' - assert dest[root + 'bar/qux'] == b'zzz' + assert dest[root + "foo"] == b"xxx" + assert dest[root + "bar/baz"] == b"yyy" + assert dest[root + "bar/qux"] == b"zzz" # invalid option with pytest.raises(ValueError): - copy_store(source, dest, if_exists='foobar') + copy_store(source, dest, if_exists="foobar") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") @@ -533,9 +541,9 @@ class TestCopyStoreV3(TestCopyStore): def setUp(self): source = KVStoreV3(dict()) - source['meta/root/foo'] = b'xxx' - source['meta/root/bar/baz'] = b'yyy' - source['meta/root/bar/qux'] = b'zzz' + source["meta/root/foo"] = b"xxx" + source["meta/root/bar/baz"] = b"yyy" + source["meta/root/bar/qux"] = b"zzz" self.source = source def _get_dest_store(self): @@ -548,12 +556,11 @@ def test_mismatched_store_versions(self): copy_store(self.source, dest) -def check_copied_array(original, copied, without_attrs=False, - expect_props=None): +def check_copied_array(original, copied, without_attrs=False, expect_props=None): # setup - source_h5py = original.__module__.startswith('h5py.') - dest_h5py = copied.__module__.startswith('h5py.') + source_h5py = original.__module__.startswith("h5py.") + dest_h5py = copied.__module__.startswith("h5py.") zarr_to_zarr = not (source_h5py or dest_h5py) h5py_to_h5py = source_h5py and dest_h5py zarr_to_h5py = not source_h5py and dest_h5py @@ -564,25 +571,32 @@ def check_copied_array(original, copied, without_attrs=False, expect_props = expect_props.copy() # common properties in zarr and h5py - for p in 'dtype', 'shape', 'chunks': + for p in "dtype", "shape", "chunks": expect_props.setdefault(p, getattr(original, p)) # zarr-specific properties if zarr_to_zarr: - for p in 'compressor', 'filters', 'order', 'fill_value': + for p in "compressor", "filters", "order", "fill_value": expect_props.setdefault(p, getattr(original, p)) # h5py-specific properties if h5py_to_h5py: - for p in ('maxshape', 'compression', 'compression_opts', 'shuffle', - 'scaleoffset', 'fletcher32', 'fillvalue'): + for p in ( + "maxshape", + "compression", + "compression_opts", + "shuffle", + "scaleoffset", + "fletcher32", + "fillvalue", + ): expect_props.setdefault(p, getattr(original, p)) # common properties with some name differences if h5py_to_zarr: - expect_props.setdefault('fill_value', original.fillvalue) + expect_props.setdefault("fill_value", original.fillvalue) if zarr_to_h5py: - expect_props.setdefault('fillvalue', original.fill_value) + expect_props.setdefault("fillvalue", original.fill_value) # compare properties for k, v in expect_props.items(): @@ -596,18 +610,17 @@ def check_copied_array(original, copied, without_attrs=False, for k in original.attrs.keys(): assert k not in copied.attrs else: - if dest_h5py and 'filters' in original.attrs: + if dest_h5py and "filters" in original.attrs: # special case in v3 (storing filters metadata under attributes) # we explicitly do not copy this info over to HDF5 original_attrs = original.attrs.asdict().copy() - original_attrs.pop('filters') + original_attrs.pop("filters") else: original_attrs = original.attrs assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) -def check_copied_group(original, copied, without_attrs=False, expect_props=None, - shallow=False): +def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): # setup if expect_props is None: @@ -617,16 +630,20 @@ def check_copied_group(original, copied, without_attrs=False, expect_props=None, # compare children for k, v in original.items(): - if hasattr(v, 'shape'): + if hasattr(v, "shape"): assert k in copied - check_copied_array(v, copied[k], without_attrs=without_attrs, - expect_props=expect_props) + check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) elif shallow: assert k not in copied else: assert k in copied - check_copied_group(v, copied[k], without_attrs=without_attrs, - shallow=shallow, expect_props=expect_props) + check_copied_group( + v, + copied[k], + without_attrs=without_attrs, + shallow=shallow, + expect_props=expect_props, + ) # compare attrs if without_attrs: @@ -657,7 +674,7 @@ def test_copy_all(): dry_run=False, ) - assert 'subgroup' in destination_group + assert "subgroup" in destination_group assert destination_group.attrs["info"] == "group attrs" assert destination_group.subgroup.attrs["info"] == "sub attrs" @@ -670,10 +687,10 @@ def test_copy_all_v3(): copy_all used to not copy attributes as `.keys()` """ - original_group = zarr.group(store=MemoryStoreV3(), path='group1', overwrite=True) + original_group = zarr.group(store=MemoryStoreV3(), path="group1", overwrite=True) original_group.create_group("subgroup") - destination_group = zarr.group(store=MemoryStoreV3(), path='group2', overwrite=True) + destination_group = zarr.group(store=MemoryStoreV3(), path="group2", overwrite=True) # copy from memory to directory store copy_all( @@ -681,200 +698,212 @@ def test_copy_all_v3(): destination_group, dry_run=False, ) - assert 'subgroup' in destination_group + assert "subgroup" in destination_group class TestCopy: - @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) def source(self, request, tmpdir): def prep_source(source): - foo = source.create_group('foo') - foo.attrs['experiment'] = 'weird science' - baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) - baz.attrs['units'] = 'metres' + foo = source.create_group("foo") + foo.attrs["experiment"] = "weird science" + baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) + baz.attrs["units"] = "metres" if request.param: - extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, - shuffle=True, fletcher32=True) + extra_kws = dict( + compression="gzip", + compression_opts=3, + fillvalue=84, + shuffle=True, + fletcher32=True, + ) else: - extra_kws = dict(compressor=Zlib(3), order='F', fill_value=42, filters=[Adler32()]) - source.create_dataset('spam', data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), dtype='i2', **extra_kws) + extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) + source.create_dataset( + "spam", + data=np.arange(100, 200).reshape(20, 5), + chunks=(10, 2), + dtype="i2", + **extra_kws, + ) return source if request.param: - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('source.h5') - with h5py.File(str(fn), mode='w') as h5f: + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("source.h5") + with h5py.File(str(fn), mode="w") as h5f: yield prep_source(h5f) else: yield prep_source(group()) - @pytest.fixture(params=[False, True], ids=['zarr', 'hdf5']) + @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) def dest(self, request, tmpdir): if request.param: - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('dest.h5') - with h5py.File(str(fn), mode='w') as h5f: + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("dest.h5") + with h5py.File(str(fn), mode="w") as h5f: yield h5f else: yield group() def test_copy_array(self, source, dest): # copy array with default options - copy(source['foo/bar/baz'], dest) - check_copied_array(source['foo/bar/baz'], dest['baz']) - copy(source['spam'], dest) - check_copied_array(source['spam'], dest['spam']) + copy(source["foo/bar/baz"], dest) + check_copied_array(source["foo/bar/baz"], dest["baz"]) + copy(source["spam"], dest) + check_copied_array(source["spam"], dest["spam"]) def test_copy_bad_dest(self, source, dest): # try to copy to an array, dest must be a group - dest = dest.create_dataset('eggs', shape=(100,)) + dest = dest.create_dataset("eggs", shape=(100,)) with pytest.raises(ValueError): - copy(source['foo/bar/baz'], dest) + copy(source["foo/bar/baz"], dest) def test_copy_array_name(self, source, dest): # copy array with name - copy(source['foo/bar/baz'], dest, name='qux') - assert 'baz' not in dest - check_copied_array(source['foo/bar/baz'], dest['qux']) + copy(source["foo/bar/baz"], dest, name="qux") + assert "baz" not in dest + check_copied_array(source["foo/bar/baz"], dest["qux"]) def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith('h5py.') + dest_h5py = dest.__module__.startswith("h5py.") # copy array, provide creation options compressor = Zlib(9) create_kws = dict(chunks=(10,)) if dest_h5py: - create_kws.update(compression='gzip', compression_opts=9, - shuffle=True, fletcher32=True, fillvalue=42) + create_kws.update( + compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 + ) else: - create_kws.update(compressor=compressor, fill_value=42, order='F', - filters=[Adler32()]) - copy(source['foo/bar/baz'], dest, without_attrs=True, **create_kws) - check_copied_array(source['foo/bar/baz'], dest['baz'], - without_attrs=True, expect_props=create_kws) + create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) + copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) + check_copied_array( + source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws + ) def test_copy_array_exists_array(self, source, dest): # copy array, dest array in the way - dest.create_dataset('baz', shape=(10,)) + dest.create_dataset("baz", shape=(10,)) # raise with pytest.raises(CopyError): # should raise by default - copy(source['foo/bar/baz'], dest) - assert (10,) == dest['baz'].shape + copy(source["foo/bar/baz"], dest) + assert (10,) == dest["baz"].shape with pytest.raises(CopyError): - copy(source['foo/bar/baz'], dest, if_exists='raise') - assert (10,) == dest['baz'].shape + copy(source["foo/bar/baz"], dest, if_exists="raise") + assert (10,) == dest["baz"].shape # skip - copy(source['foo/bar/baz'], dest, if_exists='skip') - assert (10,) == dest['baz'].shape + copy(source["foo/bar/baz"], dest, if_exists="skip") + assert (10,) == dest["baz"].shape # replace - copy(source['foo/bar/baz'], dest, if_exists='replace') - check_copied_array(source['foo/bar/baz'], dest['baz']) + copy(source["foo/bar/baz"], dest, if_exists="replace") + check_copied_array(source["foo/bar/baz"], dest["baz"]) # invalid option with pytest.raises(ValueError): - copy(source['foo/bar/baz'], dest, if_exists='foobar') + copy(source["foo/bar/baz"], dest, if_exists="foobar") def test_copy_array_exists_group(self, source, dest): # copy array, dest group in the way - dest.create_group('baz') + dest.create_group("baz") # raise with pytest.raises(CopyError): - copy(source['foo/bar/baz'], dest) - assert not hasattr(dest['baz'], 'shape') + copy(source["foo/bar/baz"], dest) + assert not hasattr(dest["baz"], "shape") with pytest.raises(CopyError): - copy(source['foo/bar/baz'], dest, if_exists='raise') - assert not hasattr(dest['baz'], 'shape') + copy(source["foo/bar/baz"], dest, if_exists="raise") + assert not hasattr(dest["baz"], "shape") # skip - copy(source['foo/bar/baz'], dest, if_exists='skip') - assert not hasattr(dest['baz'], 'shape') + copy(source["foo/bar/baz"], dest, if_exists="skip") + assert not hasattr(dest["baz"], "shape") # replace - copy(source['foo/bar/baz'], dest, if_exists='replace') - check_copied_array(source['foo/bar/baz'], dest['baz']) + copy(source["foo/bar/baz"], dest, if_exists="replace") + check_copied_array(source["foo/bar/baz"], dest["baz"]) def test_copy_array_skip_initialized(self, source, dest): - dest_h5py = dest.__module__.startswith('h5py.') + dest_h5py = dest.__module__.startswith("h5py.") - dest.create_dataset('baz', shape=(100,), chunks=(10,), dtype='i8') - assert not np.all(source['foo/bar/baz'][:] == dest['baz'][:]) + dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") + assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) if dest_h5py: with pytest.raises(ValueError): # not available with copy to h5py - copy(source['foo/bar/baz'], dest, if_exists='skip_initialized') + copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") else: # copy array, dest array exists but not yet initialized - copy(source['foo/bar/baz'], dest, if_exists='skip_initialized') - check_copied_array(source['foo/bar/baz'], dest['baz']) + copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + check_copied_array(source["foo/bar/baz"], dest["baz"]) # copy array, dest array exists and initialized, will be skipped - dest['baz'][:] = np.arange(100, 200) - copy(source['foo/bar/baz'], dest, if_exists='skip_initialized') - assert_array_equal(np.arange(100, 200), dest['baz'][:]) - assert not np.all(source['foo/bar/baz'][:] == dest['baz'][:]) + dest["baz"][:] = np.arange(100, 200) + copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") + assert_array_equal(np.arange(100, 200), dest["baz"][:]) + assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) def test_copy_group(self, source, dest): # copy group, default options - copy(source['foo'], dest) - check_copied_group(source['foo'], dest['foo']) + copy(source["foo"], dest) + check_copied_group(source["foo"], dest["foo"]) def test_copy_group_no_name(self, source, dest): with pytest.raises(TypeError): # need a name if copy root copy(source, dest) - copy(source, dest, name='root') - check_copied_group(source, dest['root']) + copy(source, dest, name="root") + check_copied_group(source, dest["root"]) def test_copy_group_options(self, source, dest): # copy group, non-default options - copy(source['foo'], dest, name='qux', without_attrs=True) - assert 'foo' not in dest - check_copied_group(source['foo'], dest['qux'], without_attrs=True) + copy(source["foo"], dest, name="qux", without_attrs=True) + assert "foo" not in dest + check_copied_group(source["foo"], dest["qux"], without_attrs=True) def test_copy_group_shallow(self, source, dest): # copy group, shallow - copy(source, dest, name='eggs', shallow=True) - check_copied_group(source, dest['eggs'], shallow=True) + copy(source, dest, name="eggs", shallow=True) + check_copied_group(source, dest["eggs"], shallow=True) def test_copy_group_exists_group(self, source, dest): # copy group, dest groups exist - dest.create_group('foo/bar') - copy(source['foo'], dest) - check_copied_group(source['foo'], dest['foo']) + dest.create_group("foo/bar") + copy(source["foo"], dest) + check_copied_group(source["foo"], dest["foo"]) def test_copy_group_exists_array(self, source, dest): # copy group, dest array in the way - dest.create_dataset('foo/bar', shape=(10,)) + dest.create_dataset("foo/bar", shape=(10,)) # raise with pytest.raises(CopyError): - copy(source['foo'], dest) - assert dest['foo/bar'].shape == (10,) + copy(source["foo"], dest) + assert dest["foo/bar"].shape == (10,) with pytest.raises(CopyError): - copy(source['foo'], dest, if_exists='raise') - assert dest['foo/bar'].shape == (10,) + copy(source["foo"], dest, if_exists="raise") + assert dest["foo/bar"].shape == (10,) # skip - copy(source['foo'], dest, if_exists='skip') - assert dest['foo/bar'].shape == (10,) + copy(source["foo"], dest, if_exists="skip") + assert dest["foo/bar"].shape == (10,) # replace - copy(source['foo'], dest, if_exists='replace') - check_copied_group(source['foo'], dest['foo']) + copy(source["foo"], dest, if_exists="replace") + check_copied_group(source["foo"], dest["foo"]) def test_copy_group_dry_run(self, source, dest): # dry run, empty destination - n_copied, n_skipped, n_bytes_copied = \ - copy(source['foo'], dest, dry_run=True, return_stats=True) + n_copied, n_skipped, n_bytes_copied = copy( + source["foo"], dest, dry_run=True, return_stats=True + ) assert 0 == len(dest) assert 3 == n_copied assert 0 == n_skipped @@ -882,133 +911,144 @@ def test_copy_group_dry_run(self, source, dest): # dry run, array exists in destination baz = np.arange(100, 200) - dest.create_dataset('foo/bar/baz', data=baz) - assert not np.all(source['foo/bar/baz'][:] == dest['foo/bar/baz'][:]) + dest.create_dataset("foo/bar/baz", data=baz) + assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) assert 1 == len(dest) # raise with pytest.raises(CopyError): - copy(source['foo'], dest, dry_run=True) + copy(source["foo"], dest, dry_run=True) assert 1 == len(dest) # skip - n_copied, n_skipped, n_bytes_copied = \ - copy(source['foo'], dest, dry_run=True, if_exists='skip', - return_stats=True) + n_copied, n_skipped, n_bytes_copied = copy( + source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True + ) assert 1 == len(dest) assert 2 == n_copied assert 1 == n_skipped assert 0 == n_bytes_copied - assert_array_equal(baz, dest['foo/bar/baz']) + assert_array_equal(baz, dest["foo/bar/baz"]) # replace - n_copied, n_skipped, n_bytes_copied = \ - copy(source['foo'], dest, dry_run=True, if_exists='replace', - return_stats=True) + n_copied, n_skipped, n_bytes_copied = copy( + source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True + ) assert 1 == len(dest) assert 3 == n_copied assert 0 == n_skipped assert 0 == n_bytes_copied - assert_array_equal(baz, dest['foo/bar/baz']) + assert_array_equal(baz, dest["foo/bar/baz"]) def test_logging(self, source, dest, tmpdir): # callable log - copy(source['foo'], dest, dry_run=True, log=print) + copy(source["foo"], dest, dry_run=True, log=print) # file name - fn = str(tmpdir.join('log_name')) - copy(source['foo'], dest, dry_run=True, log=fn) + fn = str(tmpdir.join("log_name")) + copy(source["foo"], dest, dry_run=True, log=fn) # file - with tmpdir.join('log_file').open(mode='w') as f: - copy(source['foo'], dest, dry_run=True, log=f) + with tmpdir.join("log_file").open(mode="w") as f: + copy(source["foo"], dest, dry_run=True, log=f) # bad option with pytest.raises(TypeError): - copy(source['foo'], dest, dry_run=True, log=True) + copy(source["foo"], dest, dry_run=True, log=True) @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestCopyV3(TestCopy): - - @pytest.fixture(params=['zarr', 'hdf5']) + @pytest.fixture(params=["zarr", "hdf5"]) def source(self, request, tmpdir): def prep_source(source): - foo = source.create_group('foo') - foo.attrs['experiment'] = 'weird science' - baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) - baz.attrs['units'] = 'metres' - if request.param == 'hdf5': - extra_kws = dict(compression='gzip', compression_opts=3, fillvalue=84, - shuffle=True, fletcher32=True) + foo = source.create_group("foo") + foo.attrs["experiment"] = "weird science" + baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) + baz.attrs["units"] = "metres" + if request.param == "hdf5": + extra_kws = dict( + compression="gzip", + compression_opts=3, + fillvalue=84, + shuffle=True, + fletcher32=True, + ) else: - extra_kws = dict(compressor=Zlib(3), order='F', fill_value=42, filters=[Adler32()]) - source.create_dataset('spam', data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), dtype='i2', **extra_kws) + extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) + source.create_dataset( + "spam", + data=np.arange(100, 200).reshape(20, 5), + chunks=(10, 2), + dtype="i2", + **extra_kws, + ) return source - if request.param == 'hdf5': - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('source.h5') - with h5py.File(str(fn), mode='w') as h5f: + if request.param == "hdf5": + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("source.h5") + with h5py.File(str(fn), mode="w") as h5f: yield prep_source(h5f) - elif request.param == 'zarr': - yield prep_source(group(path='group1', zarr_version=3)) + elif request.param == "zarr": + yield prep_source(group(path="group1", zarr_version=3)) # Test with various destination StoreV3 types as TestCopyV3 covers rmdir - destinations = ['hdf5', 'zarr', 'zarr_kvstore', 'zarr_directorystore', 'zarr_sqlitestore'] + destinations = ["hdf5", "zarr", "zarr_kvstore", "zarr_directorystore", "zarr_sqlitestore"] if have_fsspec: - destinations += ['zarr_fsstore'] + destinations += ["zarr_fsstore"] @pytest.fixture(params=destinations) def dest(self, request, tmpdir): - if request.param == 'hdf5': - h5py = pytest.importorskip('h5py') - fn = tmpdir.join('dest.h5') - with h5py.File(str(fn), mode='w') as h5f: + if request.param == "hdf5": + h5py = pytest.importorskip("h5py") + fn = tmpdir.join("dest.h5") + with h5py.File(str(fn), mode="w") as h5f: yield h5f - elif request.param == 'zarr': - yield group(path='group2', zarr_version=3) - elif request.param == 'zarr_kvstore': + elif request.param == "zarr": + yield group(path="group2", zarr_version=3) + elif request.param == "zarr_kvstore": store = KVStoreV3(dict()) - yield group(store, path='group2', zarr_version=3) - elif request.param == 'zarr_fsstore': - fn = tmpdir.join('dest.zr3') + yield group(store, path="group2", zarr_version=3) + elif request.param == "zarr_fsstore": + fn = tmpdir.join("dest.zr3") store = FSStoreV3(str(fn), auto_mkdir=True) - yield group(store, path='group2', zarr_version=3) - elif request.param == 'zarr_directorystore': - fn = tmpdir.join('dest.zr3') + yield group(store, path="group2", zarr_version=3) + elif request.param == "zarr_directorystore": + fn = tmpdir.join("dest.zr3") store = DirectoryStoreV3(str(fn)) - yield group(store, path='group2', zarr_version=3) - elif request.param == 'zarr_sqlitestore': - fn = tmpdir.join('dest.db') + yield group(store, path="group2", zarr_version=3) + elif request.param == "zarr_sqlitestore": + fn = tmpdir.join("dest.db") store = SQLiteStoreV3(str(fn)) - yield group(store, path='group2', zarr_version=3) + yield group(store, path="group2", zarr_version=3) def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith('h5py.') + dest_h5py = dest.__module__.startswith("h5py.") # copy array, provide creation options compressor = Zlib(9) create_kws = dict(chunks=(10,)) if dest_h5py: - create_kws.update(compression='gzip', compression_opts=9, - shuffle=True, fletcher32=True, fillvalue=42) + create_kws.update( + compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 + ) else: # v3 case has no filters argument in zarr create_kws - create_kws.update(compressor=compressor, fill_value=42, order='F') - copy(source['foo/bar/baz'], dest, without_attrs=True, **create_kws) - check_copied_array(source['foo/bar/baz'], dest['baz'], - without_attrs=True, expect_props=create_kws) + create_kws.update(compressor=compressor, fill_value=42, order="F") + copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) + check_copied_array( + source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws + ) def test_copy_group_no_name(self, source, dest): - if source.__module__.startswith('h5py'): + if source.__module__.startswith("h5py"): with pytest.raises(TypeError): copy(source, dest) else: # For v3, dest.name will be inferred from source.name copy(source, dest) - check_copied_group(source, dest[source.name.lstrip('/')]) + check_copied_group(source, dest[source.name.lstrip("/")]) - copy(source, dest, name='root') - check_copied_group(source, dest['root']) + copy(source, dest, name="root") + check_copied_group(source, dest["root"]) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index ab1a6e8aa7..d86c3bf39b 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -10,9 +10,22 @@ import numpy as np import packaging.version import pytest -from numcodecs import (BZ2, JSON, LZ4, Blosc, Categorize, Delta, - FixedScaleOffset, GZip, MsgPack, Pickle, VLenArray, - VLenBytes, VLenUTF8, Zlib) +from numcodecs import ( + BZ2, + JSON, + LZ4, + Blosc, + Categorize, + Delta, + FixedScaleOffset, + GZip, + MsgPack, + Pickle, + VLenArray, + VLenBytes, + VLenUTF8, + Zlib, +) from numcodecs.compat import ensure_bytes, ensure_ndarray from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal @@ -65,7 +78,7 @@ class TestArray(unittest.TestCase): version = 2 - root = '' + root = "" KVStoreClass = KVStore def test_array_init(self): @@ -77,7 +90,7 @@ def test_array_init(self): assert isinstance(a, Array) assert (100,) == a.shape assert (10,) == a.chunks - assert '' == a.path + assert "" == a.path assert a.name is None assert a.basename is None assert store is a.store @@ -89,14 +102,14 @@ def test_array_init(self): # initialize at path store = self.KVStoreClass(dict()) - init_array(store, shape=100, chunks=10, path='foo/bar', dtype='')) + a2 = self.create_array(shape=1000, chunks=100, dtype=dtype.newbyteorder(">")) a2[:] = 1 x2 = a2[:] assert_array_equal(x1, x2) @@ -1543,46 +1558,52 @@ def test_endian(self): a2.store.close() def test_attributes(self): - a = self.create_array(shape=10, chunks=10, dtype='i8') - a.attrs['foo'] = 'bar' + a = self.create_array(shape=10, chunks=10, dtype="i8") + a.attrs["foo"] = "bar" assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) if self.version > 2: # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs['attributes'] - assert 'foo' in attrs and attrs['foo'] == 'bar' + attrs = attrs["attributes"] + assert "foo" in attrs and attrs["foo"] == "bar" - a.attrs['bar'] = 'foo' + a.attrs["bar"] = "foo" assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) if self.version > 2: # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs['attributes'] - assert 'foo' in attrs and attrs['foo'] == 'bar' - assert 'bar' in attrs and attrs['bar'] == 'foo' + attrs = attrs["attributes"] + assert "foo" in attrs and attrs["foo"] == "bar" + assert "bar" in attrs and attrs["bar"] == "foo" a.store.close() def test_structured_with_object(self): - a = self.create_array(fill_value=(0.0, None), - shape=10, - chunks=10, - dtype=[('x', float), ('y', object)], - object_codec=Pickle()) + a = self.create_array( + fill_value=(0.0, None), + shape=10, + chunks=10, + dtype=[("x", float), ("y", object)], + object_codec=Pickle(), + ) assert tuple(a[0]) == (0.0, None) class TestArrayWithPath(TestArray): - @staticmethod def create_array(read_only=False, **kwargs): store = KVStore(dict()) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - init_array(store, path='foo/bar', **kwargs) - return Array(store, path='foo/bar', read_only=read_only, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + init_array(store, path="foo/bar", **kwargs) + return Array( + store, + path="foo/bar", + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def test_nchunks_initialized(self): pass @@ -1593,42 +1614,46 @@ def expected(self): "1437428e69754b1e1a38bd7fc9e43669577620db", "6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe", "4c0a76fb1222498e09dcd92f7f9221d6cea8b40e", - "05b0663ffe1785f38d3a459dec17e57a18f254af" + "05b0663ffe1785f38d3a459dec17e57a18f254af", ] def test_nbytes_stored(self): # MemoryStore as store z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) - for k, v in z.store.items() - if k.startswith('foo/bar/')) + expect_nbytes_stored = sum( + buffer_size(v) for k, v in z.store.items() if k.startswith("foo/bar/") + ) assert expect_nbytes_stored == z.nbytes_stored z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) - for k, v in z.store.items() - if k.startswith('foo/bar/')) + expect_nbytes_stored = sum( + buffer_size(v) for k, v in z.store.items() if k.startswith("foo/bar/") + ) assert expect_nbytes_stored == z.nbytes_stored # mess with store - z.store[z._key_prefix + 'foo'] = list(range(10)) + z.store[z._key_prefix + "foo"] = list(range(10)) assert -1 == z.nbytes_stored class TestArrayWithChunkStore(TestArray): - @staticmethod def create_array(read_only=False, **kwargs): store = KVStore(dict()) # separate chunk store chunk_store = KVStore(dict()) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) init_array(store, chunk_store=chunk_store, **kwargs) - return Array(store, read_only=read_only, chunk_store=chunk_store, - cache_metadata=cache_metadata, cache_attrs=cache_attrs, - write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + chunk_store=chunk_store, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def expected(self): return [ @@ -1636,41 +1661,43 @@ def expected(self): "1437428e69754b1e1a38bd7fc9e43669577620db", "6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe", "4c0a76fb1222498e09dcd92f7f9221d6cea8b40e", - "05b0663ffe1785f38d3a459dec17e57a18f254af" + "05b0663ffe1785f38d3a459dec17e57a18f254af", ] def test_nbytes_stored(self): z = self.create_array(shape=1000, chunks=100) expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - expect_nbytes_stored += sum(buffer_size(v) - for v in z.chunk_store.values()) + expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) assert expect_nbytes_stored == z.nbytes_stored z[:] = 42 expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values()) - expect_nbytes_stored += sum(buffer_size(v) - for v in z.chunk_store.values()) + expect_nbytes_stored += sum(buffer_size(v) for v in z.chunk_store.values()) assert expect_nbytes_stored == z.nbytes_stored # mess with store - z.chunk_store[z._key_prefix + 'foo'] = list(range(10)) + z.chunk_store[z._key_prefix + "foo"] = list(range(10)) assert -1 == z.nbytes_stored class TestArrayWithDirectoryStore(TestArray): - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() atexit.register(shutil.rmtree, path) store = DirectoryStore(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + kwargs.setdefault("compressor", Zlib(1)) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def test_nbytes_stored(self): @@ -1695,7 +1722,6 @@ def test_array_init_from_dict(): @skip_test_env_var("ZARR_TEST_ABS") class TestArrayWithABSStore(TestArray): - @staticmethod def absstore(): client = abs_container() @@ -1705,13 +1731,18 @@ def absstore(): def create_array(self, read_only=False, **kwargs): store = self.absstore() - kwargs.setdefault('compressor', Zlib(1)) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) + kwargs.setdefault("compressor", Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) @pytest.mark.xfail def test_nbytes_stored(self): @@ -1724,19 +1755,23 @@ def test_pickle(self): class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore): - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() atexit.register(shutil.rmtree, path) store = NestedDirectoryStore(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + kwargs.setdefault("compressor", Zlib(1)) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def expected(self): return [ @@ -1749,19 +1784,23 @@ def expected(self): class TestArrayWithN5Store(TestArrayWithDirectoryStore): - @staticmethod def create_array(read_only=False, **kwargs): path = mkdtemp() atexit.register(shutil.rmtree, path) store = N5Store(path) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - write_empty_chunks = kwargs.pop('write_empty_chunks', True) - kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop("cache_metadata", True) + cache_attrs = kwargs.pop("cache_attrs", True) + write_empty_chunks = kwargs.pop("write_empty_chunks", True) + kwargs.setdefault("compressor", Zlib(1)) init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks) + return Array( + store, + read_only=read_only, + cache_metadata=cache_metadata, + cache_attrs=cache_attrs, + write_empty_chunks=write_empty_chunks, + ) def test_array_0d(self): # test behaviour for array with 0 dimensions @@ -1816,8 +1855,7 @@ def test_array_1d_fill_value(self): a = np.arange(nvalues, dtype=dtype) f = np.empty_like(a) f.fill(fill_value or 0) - z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, - fill_value=fill_value) + z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, fill_value=fill_value) z[190:310] = a[190:310] assert_array_equal(f[:190], z[:190]) @@ -1825,21 +1863,18 @@ def test_array_1d_fill_value(self): assert_array_equal(f[310:], z[310:]) with pytest.raises(ValueError): - z = self.create_array(shape=(nvalues,), chunks=100, dtype=dtype, - fill_value=1) + z = self.create_array(shape=(nvalues,), chunks=100, dtype=dtype, fill_value=1) def test_nchunks_initialized(self): fill_value = 0 - dtype = 'int' - z = self.create_array(shape=100, - chunks=10, - fill_value=fill_value, - dtype=dtype, - write_empty_chunks=True) + dtype = "int" + z = self.create_array( + shape=100, chunks=10, fill_value=fill_value, dtype=dtype, write_empty_chunks=True + ) assert 0 == z.nchunks_initialized # manually put something into the store to confuse matters - z.store['foo'] = b'bar' + z.store["foo"] = b"bar" assert 0 == z.nchunks_initialized z[:] = 42 assert 10 == z.nchunks_initialized @@ -1849,11 +1884,9 @@ def test_nchunks_initialized(self): # second round of similar tests with write_empty_chunks set to # False - z = self.create_array(shape=100, - chunks=10, - fill_value=fill_value, - dtype=dtype, - write_empty_chunks=False) + z = self.create_array( + shape=100, chunks=10, fill_value=fill_value, dtype=dtype, write_empty_chunks=False + ) z[:] = 42 assert 10 == z.nchunks_initialized # manually remove a chunk from the store @@ -1866,61 +1899,69 @@ def test_array_order(self): # N5 only supports 'C' at the moment with pytest.raises(ValueError): - self.create_array(shape=(10, 11), chunks=(10, 11), dtype='i8', - order='F') + self.create_array(shape=(10, 11), chunks=(10, 11), dtype="i8", order="F") # 1D a = np.arange(1050) - z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, - order='C') - assert z.order == 'C' + z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, order="C") + assert z.order == "C" assert z[:].flags.c_contiguous z[:] = a assert_array_equal(a, z[:]) # 2D a = np.arange(10000).reshape((100, 100)) - z = self.create_array(shape=a.shape, chunks=(10, 10), - dtype=a.dtype, order='C') + z = self.create_array(shape=a.shape, chunks=(10, 10), dtype=a.dtype, order="C") - assert z.order == 'C' + assert z.order == "C" assert z[:].flags.c_contiguous z[:] = a actual = z[:] assert_array_equal(a, actual) def test_structured_array(self): - d = np.array([(b'aaa', 1, 4.2), - (b'bbb', 2, 8.4), - (b'ccc', 3, 12.6)], - dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - fill_values = None, b'', (b'zzz', 42, 16.8) + d = np.array( + [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)], + dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], + ) + fill_values = None, b"", (b"zzz", 42, 16.8) with pytest.raises(TypeError): self.check_structured_array(d, fill_values) def test_structured_array_subshapes(self): - d = np.array([(0, ((0, 1, 2), (1, 2, 3)), b'aaa'), - (1, ((1, 2, 3), (2, 3, 4)), b'bbb'), - (2, ((2, 3, 4), (3, 4, 5)), b'ccc')], - dtype=[('foo', 'i8'), ('bar', '(2, 3)f4'), ('baz', 'S3')]) - fill_values = None, b'', (0, ((0, 0, 0), (1, 1, 1)), b'zzz') + d = np.array( + [ + (0, ((0, 1, 2), (1, 2, 3)), b"aaa"), + (1, ((1, 2, 3), (2, 3, 4)), b"bbb"), + (2, ((2, 3, 4), (3, 4, 5)), b"ccc"), + ], + dtype=[("foo", "i8"), ("bar", "(2, 3)f4"), ("baz", "S3")], + ) + fill_values = None, b"", (0, ((0, 0, 0), (1, 1, 1)), b"zzz") with pytest.raises(TypeError): self.check_structured_array(d, fill_values) def test_structured_array_nested(self): - d = np.array([(0, (0, ((0, 1), (1, 2), (2, 3)), 0), b'aaa'), - (1, (1, ((1, 2), (2, 3), (3, 4)), 1), b'bbb'), - (2, (2, ((2, 3), (3, 4), (4, 5)), 2), b'ccc')], - dtype=[('foo', 'i8'), ('bar', [('foo', 'i4'), ('bar', '(3, 2)f4'), - ('baz', 'u1')]), ('baz', 'S3')]) - fill_values = None, b'', (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b'zzz') + d = np.array( + [ + (0, (0, ((0, 1), (1, 2), (2, 3)), 0), b"aaa"), + (1, (1, ((1, 2), (2, 3), (3, 4)), 1), b"bbb"), + (2, (2, ((2, 3), (3, 4), (4, 5)), 2), b"ccc"), + ], + dtype=[ + ("foo", "i8"), + ("bar", [("foo", "i4"), ("bar", "(3, 2)f4"), ("baz", "u1")]), + ("baz", "S3"), + ], + ) + fill_values = None, b"", (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b"zzz") with pytest.raises(TypeError): self.check_structured_array(d, fill_values) def test_dtypes(self): # integers - for dtype in 'u1', 'u2', 'u4', 'u8', 'i1', 'i2', 'i4', 'i8': + for dtype in "u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8": z = self.create_array(shape=10, chunks=3, dtype=dtype) assert z.dtype == np.dtype(dtype) a = np.arange(z.shape[0], dtype=dtype) @@ -1928,7 +1969,7 @@ def test_dtypes(self): assert_array_equal(a, z[:]) # floats - for dtype in 'f2', 'f4', 'f8': + for dtype in "f2", "f4", "f8": z = self.create_array(shape=10, chunks=3, dtype=dtype) assert z.dtype == np.dtype(dtype) a = np.linspace(0, 1, z.shape[0], dtype=dtype) @@ -1937,9 +1978,9 @@ def test_dtypes(self): # check that datetime generic units are not allowed with pytest.raises(ValueError): - self.create_array(shape=100, dtype='M8') + self.create_array(shape=100, dtype="M8") with pytest.raises(ValueError): - self.create_array(shape=100, dtype='m8') + self.create_array(shape=100, dtype="m8") def test_object_arrays(self): @@ -1970,7 +2011,7 @@ def test_object_arrays_vlen_text(self): def test_object_arrays_vlen_bytes(self): - greetings_bytes = [g.encode('utf8') for g in greetings] + greetings_bytes = [g.encode("utf8") for g in greetings] data = np.array(greetings_bytes * 1000, dtype=object) with pytest.raises(ValueError): @@ -1982,19 +2023,19 @@ def test_object_arrays_vlen_bytes(self): def test_object_arrays_vlen_array(self): - data = np.array([np.array([1, 3, 7]), - np.array([5]), - np.array([2, 8, 12])] * 1000, dtype=object) + data = np.array( + [np.array([1, 3, 7]), np.array([5]), np.array([2, 8, 12])] * 1000, dtype=object + ) - codecs = VLenArray(int), VLenArray(' 2 and g1.store.is_erasable(): - arr_path = g1.path + '/arr1' + arr_path = g1.path + "/arr1" sfx = _get_metadata_suffix(g1.store) - array_meta_file = meta_root + arr_path + '.array' + sfx + array_meta_file = meta_root + arr_path + ".array" + sfx assert array_meta_file in g1.store - group_meta_file = meta_root + g2.path + '.group' + sfx + group_meta_file = meta_root + g2.path + ".group" + sfx assert group_meta_file in g1.store # rmdir on the array path should also remove the metadata file @@ -280,21 +308,21 @@ def test_rmdir_group_and_array_metadata_files(self): assert group_meta_file not in g1.store def _dataset_path(self, group, path): - path = path.rstrip('/') - absolute = path.startswith('/') + path = path.rstrip("/") + absolute = path.startswith("/") if absolute: dataset_path = path else: - dataset_path = '/'.join([group.path, path]) - dataset_path = dataset_path.lstrip('/') - dataset_name = '/' + dataset_path + dataset_path = "/".join([group.path, path]) + dataset_path = dataset_path.lstrip("/") + dataset_name = "/" + dataset_path return dataset_path, dataset_name def test_create_dataset(self): g = self.create_group() # create as immediate child - dpath = 'foo' + dpath = "foo" d1 = g.create_dataset(dpath, shape=1000, chunks=100) path, name = self._dataset_path(g, dpath) assert isinstance(d1, Array) @@ -305,32 +333,39 @@ def test_create_dataset(self): assert g.store is d1.store # create as descendant - dpath = '/a/b/c/' - d2 = g.create_dataset(dpath, shape=2000, chunks=200, dtype='i1', - compression='zlib', compression_opts=9, - fill_value=42, order='F') + dpath = "/a/b/c/" + d2 = g.create_dataset( + dpath, + shape=2000, + chunks=200, + dtype="i1", + compression="zlib", + compression_opts=9, + fill_value=42, + order="F", + ) path, name = self._dataset_path(g, dpath) assert isinstance(d2, Array) assert (2000,) == d2.shape assert (200,) == d2.chunks - assert np.dtype('i1') == d2.dtype - assert 'zlib' == d2.compressor.codec_id + assert np.dtype("i1") == d2.dtype + assert "zlib" == d2.compressor.codec_id assert 9 == d2.compressor.level assert 42 == d2.fill_value - assert 'F' == d2.order + assert "F" == d2.order assert path == d2.path assert name == d2.name assert g.store is d2.store # create with data - data = np.arange(3000, dtype='u2') - dpath = 'bar' + data = np.arange(3000, dtype="u2") + dpath = "bar" d3 = g.create_dataset(dpath, data=data, chunks=300) path, name = self._dataset_path(g, dpath) assert isinstance(d3, Array) assert (3000,) == d3.shape assert (300,) == d3.chunks - assert np.dtype('u2') == d3.dtype + assert np.dtype("u2") == d3.dtype assert_array_equal(data, d3[:]) assert path == d3.path assert name == d3.name @@ -339,35 +374,39 @@ def test_create_dataset(self): # compression arguments handling follows... # compression_opts as dict - d = g.create_dataset('aaa', shape=1000, dtype='u1', - compression='blosc', - compression_opts=dict(cname='zstd', clevel=1, shuffle=2)) - assert d.compressor.codec_id == 'blosc' - assert 'zstd' == d.compressor.cname + d = g.create_dataset( + "aaa", + shape=1000, + dtype="u1", + compression="blosc", + compression_opts=dict(cname="zstd", clevel=1, shuffle=2), + ) + assert d.compressor.codec_id == "blosc" + assert "zstd" == d.compressor.cname assert 1 == d.compressor.clevel assert 2 == d.compressor.shuffle # compression_opts as sequence - d = g.create_dataset('bbb', shape=1000, dtype='u1', - compression='blosc', - compression_opts=('zstd', 1, 2)) - assert d.compressor.codec_id == 'blosc' - assert 'zstd' == d.compressor.cname + d = g.create_dataset( + "bbb", shape=1000, dtype="u1", compression="blosc", compression_opts=("zstd", 1, 2) + ) + assert d.compressor.codec_id == "blosc" + assert "zstd" == d.compressor.cname assert 1 == d.compressor.clevel assert 2 == d.compressor.shuffle # None compression_opts - d = g.create_dataset('ccc', shape=1000, dtype='u1', compression='zlib') - assert d.compressor.codec_id == 'zlib' + d = g.create_dataset("ccc", shape=1000, dtype="u1", compression="zlib") + assert d.compressor.codec_id == "zlib" assert 1 == d.compressor.level # None compression - d = g.create_dataset('ddd', shape=1000, dtype='u1', compression=None) + d = g.create_dataset("ddd", shape=1000, dtype="u1", compression=None) assert d.compressor is None # compressor as compression - d = g.create_dataset('eee', shape=1000, dtype='u1', compression=Zlib(1)) - assert d.compressor.codec_id == 'zlib' + d = g.create_dataset("eee", shape=1000, dtype="u1", compression=Zlib(1)) + assert d.compressor.codec_id == "zlib" assert 1 == d.compressor.level g.store.close() @@ -376,25 +415,25 @@ def test_require_dataset(self): g = self.create_group() # create - dpath = 'foo' - d1 = g.require_dataset(dpath, shape=1000, chunks=100, dtype='f4') + dpath = "foo" + d1 = g.require_dataset(dpath, shape=1000, chunks=100, dtype="f4") d1[:] = np.arange(1000) path, name = self._dataset_path(g, dpath) assert isinstance(d1, Array) assert (1000,) == d1.shape assert (100,) == d1.chunks - assert np.dtype('f4') == d1.dtype + assert np.dtype("f4") == d1.dtype assert path == d1.path assert name == d1.name assert g.store is d1.store assert_array_equal(np.arange(1000), d1[:]) # require - d2 = g.require_dataset(dpath, shape=1000, chunks=100, dtype='f4') + d2 = g.require_dataset(dpath, shape=1000, chunks=100, dtype="f4") assert isinstance(d2, Array) assert (1000,) == d2.shape assert (100,) == d2.chunks - assert np.dtype('f4') == d2.dtype + assert np.dtype("f4") == d2.dtype assert path == d2.path assert name == d2.name assert g.store is d2.store @@ -403,20 +442,19 @@ def test_require_dataset(self): # bad shape - use TypeError for h5py compatibility with pytest.raises(TypeError): - g.require_dataset('foo', shape=2000, chunks=100, dtype='f4') + g.require_dataset("foo", shape=2000, chunks=100, dtype="f4") # dtype matching # can cast - d3 = g.require_dataset('foo', shape=1000, chunks=100, dtype='i2') - assert np.dtype('f4') == d3.dtype + d3 = g.require_dataset("foo", shape=1000, chunks=100, dtype="i2") + assert np.dtype("f4") == d3.dtype assert d1 == d3 with pytest.raises(TypeError): # cannot cast - g.require_dataset('foo', shape=1000, chunks=100, dtype='i4') + g.require_dataset("foo", shape=1000, chunks=100, dtype="i4") with pytest.raises(TypeError): # can cast but not exact match - g.require_dataset('foo', shape=1000, chunks=100, dtype='i2', - exact=True) + g.require_dataset("foo", shape=1000, chunks=100, dtype="i2", exact=True) g.store.close() @@ -424,80 +462,76 @@ def test_create_errors(self): g = self.create_group() # array obstructs group, array - g.create_dataset('foo', shape=100, chunks=10) + g.create_dataset("foo", shape=100, chunks=10) with pytest.raises(ValueError): - g.create_group('foo/bar') + g.create_group("foo/bar") with pytest.raises(ValueError): - g.require_group('foo/bar') + g.require_group("foo/bar") with pytest.raises(ValueError): - g.create_dataset('foo/bar', shape=100, chunks=10) + g.create_dataset("foo/bar", shape=100, chunks=10) with pytest.raises(ValueError): - g.require_dataset('foo/bar', shape=100, chunks=10) + g.require_dataset("foo/bar", shape=100, chunks=10) # array obstructs group, array - g.create_dataset('a/b', shape=100, chunks=10) + g.create_dataset("a/b", shape=100, chunks=10) with pytest.raises(ValueError): - g.create_group('a/b') + g.create_group("a/b") with pytest.raises(ValueError): - g.require_group('a/b') + g.require_group("a/b") with pytest.raises(ValueError): - g.create_dataset('a/b', shape=100, chunks=10) + g.create_dataset("a/b", shape=100, chunks=10) # group obstructs array - g.create_group('c/d') + g.create_group("c/d") with pytest.raises(ValueError): - g.create_dataset('c', shape=100, chunks=10) + g.create_dataset("c", shape=100, chunks=10) with pytest.raises(ValueError): - g.require_dataset('c', shape=100, chunks=10) + g.require_dataset("c", shape=100, chunks=10) with pytest.raises(ValueError): - g.create_dataset('c/d', shape=100, chunks=10) + g.create_dataset("c/d", shape=100, chunks=10) with pytest.raises(ValueError): - g.require_dataset('c/d', shape=100, chunks=10) + g.require_dataset("c/d", shape=100, chunks=10) # h5py compatibility, accept 'fillvalue' - d = g.create_dataset('x', shape=100, chunks=10, fillvalue=42) + d = g.create_dataset("x", shape=100, chunks=10, fillvalue=42) assert 42 == d.fill_value # h5py compatibility, ignore 'shuffle' with pytest.warns(UserWarning, match="ignoring keyword argument 'shuffle'"): - g.create_dataset('y', shape=100, chunks=10, shuffle=True) + g.create_dataset("y", shape=100, chunks=10, shuffle=True) # read-only g = self.create_group(read_only=True) with pytest.raises(PermissionError): - g.create_group('zzz') + g.create_group("zzz") with pytest.raises(PermissionError): - g.require_group('zzz') + g.require_group("zzz") with pytest.raises(PermissionError): - g.create_dataset('zzz', shape=100, chunks=10) + g.create_dataset("zzz", shape=100, chunks=10) with pytest.raises(PermissionError): - g.require_dataset('zzz', shape=100, chunks=10) + g.require_dataset("zzz", shape=100, chunks=10) g.store.close() def test_create_overwrite(self): try: - for method_name in 'create_dataset', 'create', 'empty', 'zeros', \ - 'ones': + for method_name in "create_dataset", "create", "empty", "zeros", "ones": g = self.create_group() - getattr(g, method_name)('foo', shape=100, chunks=10) + getattr(g, method_name)("foo", shape=100, chunks=10) # overwrite array with array - d = getattr(g, method_name)('foo', shape=200, chunks=20, - overwrite=True) + d = getattr(g, method_name)("foo", shape=200, chunks=20, overwrite=True) assert (200,) == d.shape # overwrite array with group - g2 = g.create_group('foo', overwrite=True) + g2 = g.create_group("foo", overwrite=True) assert 0 == len(g2) # overwrite group with array - d = getattr(g, method_name)('foo', shape=300, chunks=30, - overwrite=True) + d = getattr(g, method_name)("foo", shape=300, chunks=30, overwrite=True) assert (300,) == d.shape # overwrite array with group - d = getattr(g, method_name)('foo/bar', shape=400, chunks=40, - overwrite=True) + d = getattr(g, method_name)("foo/bar", shape=400, chunks=40, overwrite=True) assert (400,) == d.shape - assert isinstance(g['foo'], Group) + assert isinstance(g["foo"], Group) g.store.close() except NotImplementedError: @@ -506,84 +540,84 @@ def test_create_overwrite(self): def test_getitem_contains_iterators(self): # setup g1 = self.create_group() - g2 = g1.create_group('foo/bar') + g2 = g1.create_group("foo/bar") if g1._version == 2: - d1 = g2.create_dataset('/a/b/c', shape=1000, chunks=100) + d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) else: # v3: cannot create a dataset at the root by starting with / # instead, need to create the dataset on g1 directly - d1 = g1.create_dataset('a/b/c', shape=1000, chunks=100) + d1 = g1.create_dataset("a/b/c", shape=1000, chunks=100) d1[:] = np.arange(1000) - d2 = g1.create_dataset('foo/baz', shape=3000, chunks=300) + d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) d2[:] = np.arange(3000) # test __getitem__ - assert isinstance(g1['foo'], Group) - assert isinstance(g1['foo']['bar'], Group) - assert isinstance(g1['foo/bar'], Group) + assert isinstance(g1["foo"], Group) + assert isinstance(g1["foo"]["bar"], Group) + assert isinstance(g1["foo/bar"], Group) if g1._version == 2: - assert isinstance(g1['/foo/bar/'], Group) + assert isinstance(g1["/foo/bar/"], Group) else: # start or end with / raises KeyError # TODO: should we allow stripping of these on v3? with pytest.raises(KeyError): - assert isinstance(g1['/foo/bar/'], Group) - assert isinstance(g1['foo/baz'], Array) - assert g2 == g1['foo/bar'] - assert g1['foo']['bar'] == g1['foo/bar'] - assert d2 == g1['foo/baz'] - assert_array_equal(d2[:], g1['foo/baz']) - assert isinstance(g1['a'], Group) - assert isinstance(g1['a']['b'], Group) - assert isinstance(g1['a/b'], Group) - assert isinstance(g1['a']['b']['c'], Array) - assert isinstance(g1['a/b/c'], Array) - assert d1 == g1['a/b/c'] - assert g1['a']['b']['c'] == g1['a/b/c'] - assert_array_equal(d1[:], g1['a/b/c'][:]) + assert isinstance(g1["/foo/bar/"], Group) + assert isinstance(g1["foo/baz"], Array) + assert g2 == g1["foo/bar"] + assert g1["foo"]["bar"] == g1["foo/bar"] + assert d2 == g1["foo/baz"] + assert_array_equal(d2[:], g1["foo/baz"]) + assert isinstance(g1["a"], Group) + assert isinstance(g1["a"]["b"], Group) + assert isinstance(g1["a/b"], Group) + assert isinstance(g1["a"]["b"]["c"], Array) + assert isinstance(g1["a/b/c"], Array) + assert d1 == g1["a/b/c"] + assert g1["a"]["b"]["c"] == g1["a/b/c"] + assert_array_equal(d1[:], g1["a/b/c"][:]) # test __contains__ - assert 'foo' in g1 - assert 'foo/bar' in g1 - assert 'foo/baz' in g1 - assert 'bar' in g1['foo'] - assert 'a' in g1 - assert 'a/b' in g1 - assert 'a/b/c' in g1 - assert 'baz' not in g1 - assert 'a/b/c/d' not in g1 - assert 'a/z' not in g1 - assert 'quux' not in g1['foo'] + assert "foo" in g1 + assert "foo/bar" in g1 + assert "foo/baz" in g1 + assert "bar" in g1["foo"] + assert "a" in g1 + assert "a/b" in g1 + assert "a/b/c" in g1 + assert "baz" not in g1 + assert "a/b/c/d" not in g1 + assert "a/z" not in g1 + assert "quux" not in g1["foo"] # test key errors with pytest.raises(KeyError): - g1['baz'] + g1["baz"] with pytest.raises(KeyError): - g1['x/y/z'] + g1["x/y/z"] # test __len__ assert 2 == len(g1) - assert 2 == len(g1['foo']) - assert 0 == len(g1['foo/bar']) - assert 1 == len(g1['a']) - assert 1 == len(g1['a/b']) + assert 2 == len(g1["foo"]) + assert 0 == len(g1["foo/bar"]) + assert 1 == len(g1["a"]) + assert 1 == len(g1["a/b"]) # test __iter__, keys() if g1._version == 2: # currently assumes sorted by key - assert ['a', 'foo'] == list(g1) - assert ['a', 'foo'] == list(g1.keys()) - assert ['bar', 'baz'] == list(g1['foo']) - assert ['bar', 'baz'] == list(g1['foo'].keys()) + assert ["a", "foo"] == list(g1) + assert ["a", "foo"] == list(g1.keys()) + assert ["bar", "baz"] == list(g1["foo"]) + assert ["bar", "baz"] == list(g1["foo"].keys()) else: # v3 is not necessarily sorted by key - assert ['a', 'foo'] == sorted(list(g1)) - assert ['a', 'foo'] == sorted(list(g1.keys())) - assert ['bar', 'baz'] == sorted(list(g1['foo'])) - assert ['bar', 'baz'] == sorted(list(g1['foo'].keys())) - assert [] == sorted(g1['foo/bar']) - assert [] == sorted(g1['foo/bar'].keys()) + assert ["a", "foo"] == sorted(list(g1)) + assert ["a", "foo"] == sorted(list(g1.keys())) + assert ["bar", "baz"] == sorted(list(g1["foo"])) + assert ["bar", "baz"] == sorted(list(g1["foo"].keys())) + assert [] == sorted(g1["foo/bar"]) + assert [] == sorted(g1["foo/bar"].keys()) # test items(), values() # currently assumes sorted by key @@ -593,24 +627,24 @@ def test_getitem_contains_iterators(self): if g1._version == 3: # v3 are not automatically sorted by key items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) - assert 'a' == items[0][0] - assert g1['a'] == items[0][1] - assert g1['a'] == values[0] - assert 'foo' == items[1][0] - assert g1['foo'] == items[1][1] - assert g1['foo'] == values[1] - - items = list(g1['foo'].items()) - values = list(g1['foo'].values()) + assert "a" == items[0][0] + assert g1["a"] == items[0][1] + assert g1["a"] == values[0] + assert "foo" == items[1][0] + assert g1["foo"] == items[1][1] + assert g1["foo"] == values[1] + + items = list(g1["foo"].items()) + values = list(g1["foo"].values()) if g1._version == 3: # v3 are not automatically sorted by key items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) - assert 'bar' == items[0][0] - assert g1['foo']['bar'] == items[0][1] - assert g1['foo']['bar'] == values[0] - assert 'baz' == items[1][0] - assert g1['foo']['baz'] == items[1][1] - assert g1['foo']['baz'] == values[1] + assert "bar" == items[0][0] + assert g1["foo"]["bar"] == items[0][1] + assert g1["foo"]["bar"] == values[0] + assert "baz" == items[1][0] + assert g1["foo"]["baz"] == items[1][1] + assert g1["foo"]["baz"] == values[1] # test array_keys(), arrays(), group_keys(), groups() @@ -618,29 +652,29 @@ def test_getitem_contains_iterators(self): arrays = list(g1.arrays()) if g1._version == 2: # currently assumes sorted by key - assert ['a', 'foo'] == list(g1.group_keys()) + assert ["a", "foo"] == list(g1.group_keys()) else: - assert ['a', 'foo'] == sorted(list(g1.group_keys())) + assert ["a", "foo"] == sorted(list(g1.group_keys())) groups = sorted(groups) arrays = sorted(arrays) - assert 'a' == groups[0][0] - assert g1['a'] == groups[0][1] - assert 'foo' == groups[1][0] - assert g1['foo'] == groups[1][1] + assert "a" == groups[0][0] + assert g1["a"] == groups[0][1] + assert "foo" == groups[1][0] + assert g1["foo"] == groups[1][1] assert [] == list(g1.array_keys()) assert [] == arrays - assert ['bar'] == list(g1['foo'].group_keys()) - assert ['baz'] == list(g1['foo'].array_keys()) - groups = list(g1['foo'].groups()) - arrays = list(g1['foo'].arrays()) + assert ["bar"] == list(g1["foo"].group_keys()) + assert ["baz"] == list(g1["foo"].array_keys()) + groups = list(g1["foo"].groups()) + arrays = list(g1["foo"].arrays()) if g1._version == 3: groups = sorted(groups) arrays = sorted(arrays) - assert 'bar' == groups[0][0] - assert g1['foo']['bar'] == groups[0][1] - assert 'baz' == arrays[0][0] - assert g1['foo']['baz'] == arrays[0][1] + assert "bar" == groups[0][0] + assert g1["foo"]["bar"] == groups[0][1] + assert "baz" == arrays[0][0] + assert g1["foo"]["baz"] == arrays[0][1] # visitor collection tests items = [] @@ -666,7 +700,7 @@ def visitor4(name, obj): "foo/baz", ] if g1._version == 3: - expected_items = [g1.path + '/' + i for i in expected_items] + expected_items = [g1.path + "/" + i for i in expected_items] assert expected_items == items del items[:] @@ -676,7 +710,7 @@ def visitor4(name, obj): "foo/baz", ] if g1._version == 3: - expected_items = [g1.path + '/' + i for i in expected_items] + expected_items = [g1.path + "/" + i for i in expected_items] assert expected_items == items del items[:] @@ -753,7 +787,7 @@ def visitor0(val, *args): # noinspection PyUnusedLocal def visitor1(val, *args): name = getattr(val, "path", val) - if name.startswith('group/'): + if name.startswith("group/"): # strip the group path for v3 name = name[6:] if name == "a/b/c": @@ -779,8 +813,7 @@ def test_double_counting_group_v3(self): sub_group.create("bar", shape=10, dtype="i4") assert list(root_group.group_keys()) == sorted(group_names) assert list(root_group.groups()) == [ - (name, root_group[name]) - for name in sorted(group_names) + (name, root_group[name]) for name in sorted(group_names) ] def test_empty_getitem_contains_iterators(self): @@ -791,47 +824,47 @@ def test_empty_getitem_contains_iterators(self): assert [] == list(g) assert [] == list(g.keys()) assert 0 == len(g) - assert 'foo' not in g + assert "foo" not in g g.store.close() def test_iterators_recurse(self): # setup g1 = self.create_group() - g2 = g1.create_group('foo/bar') - d1 = g2.create_dataset('/a/b/c', shape=1000, chunks=100) + g2 = g1.create_group("foo/bar") + d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) d1[:] = np.arange(1000) - d2 = g1.create_dataset('foo/baz', shape=3000, chunks=300) + d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) d2[:] = np.arange(3000) - d3 = g2.create_dataset('zab', shape=2000, chunks=200) + d3 = g2.create_dataset("zab", shape=2000, chunks=200) d3[:] = np.arange(2000) # test recursive array_keys - array_keys = list(g1['foo'].array_keys(recurse=False)) - array_keys_recurse = list(g1['foo'].array_keys(recurse=True)) + array_keys = list(g1["foo"].array_keys(recurse=False)) + array_keys_recurse = list(g1["foo"].array_keys(recurse=True)) assert len(array_keys_recurse) > len(array_keys) - assert sorted(array_keys_recurse) == ['baz', 'zab'] + assert sorted(array_keys_recurse) == ["baz", "zab"] # test recursive arrays - arrays = list(g1['foo'].arrays(recurse=False)) - arrays_recurse = list(g1['foo'].arrays(recurse=True)) + arrays = list(g1["foo"].arrays(recurse=False)) + arrays_recurse = list(g1["foo"].arrays(recurse=True)) assert len(arrays_recurse) > len(arrays) - assert 'zab' == arrays_recurse[0][0] - assert g1['foo']['bar']['zab'] == arrays_recurse[0][1] + assert "zab" == arrays_recurse[0][0] + assert g1["foo"]["bar"]["zab"] == arrays_recurse[0][1] g1.store.close() def test_getattr(self): # setup g1 = self.create_group() - g2 = g1.create_group('foo') - g2.create_dataset('bar', shape=100) + g2 = g1.create_group("foo") + g2.create_dataset("bar", shape=100) # test - assert g1['foo'] == g1.foo - assert g2['bar'] == g2.bar + assert g1["foo"] == g1.foo + assert g2["bar"] == g2.bar # test that hasattr returns False instead of an exception (issue #88) - assert not hasattr(g1, 'unexistingattribute') + assert not hasattr(g1, "unexistingattribute") g1.store.close() @@ -839,46 +872,46 @@ def test_setitem(self): g = self.create_group() try: data = np.arange(100) - g['foo'] = data - assert_array_equal(data, g['foo']) + g["foo"] = data + assert_array_equal(data, g["foo"]) data = np.arange(200) - g['foo'] = data - assert_array_equal(data, g['foo']) + g["foo"] = data + assert_array_equal(data, g["foo"]) # 0d array - g['foo'] = 42 - assert () == g['foo'].shape - assert 42 == g['foo'][()] + g["foo"] = 42 + assert () == g["foo"].shape + assert 42 == g["foo"][()] except NotImplementedError: pass g.store.close() def test_delitem(self): g = self.create_group() - g.create_group('foo') - g.create_dataset('bar/baz', shape=100, chunks=10) - assert 'foo' in g - assert 'bar' in g - assert 'bar/baz' in g + g.create_group("foo") + g.create_dataset("bar/baz", shape=100, chunks=10) + assert "foo" in g + assert "bar" in g + assert "bar/baz" in g try: - del g['bar'] + del g["bar"] with pytest.raises(KeyError): - del g['xxx'] + del g["xxx"] except NotImplementedError: pass else: - assert 'foo' in g - assert 'bar' not in g - assert 'bar/baz' not in g + assert "foo" in g + assert "bar" not in g + assert "bar/baz" not in g g.store.close() def test_move(self): g = self.create_group() data = np.arange(100) - g['boo'] = data + g["boo"] = data data = np.arange(100) - g['foo'] = data + g["foo"] = data g.move("foo", "bar") assert "foo" not in g @@ -911,11 +944,11 @@ def test_move(self): # meta/data/bar. This is outside the `g` group located at # /meta/root/group, so bar is no longer within `g`. assert "bar" not in g - assert 'meta/root/bar.array.json' in g._store + assert "meta/root/bar.array.json" in g._store if g._chunk_store: - assert 'data/root/bar/c0' in g._chunk_store + assert "data/root/bar/c0" in g._chunk_store else: - assert 'data/root/bar/c0' in g._store + assert "data/root/bar/c0" in g._store assert isinstance(g["foo2"], Group) if g2._version == 2: assert_array_equal(data, g["bar"]) @@ -938,35 +971,35 @@ def test_move(self): def test_array_creation(self): grp = self.create_group() - a = grp.create('a', shape=100, chunks=10) + a = grp.create("a", shape=100, chunks=10) assert isinstance(a, Array) - b = grp.empty('b', shape=100, chunks=10) + b = grp.empty("b", shape=100, chunks=10) assert isinstance(b, Array) assert b.fill_value is None - c = grp.zeros('c', shape=100, chunks=10) + c = grp.zeros("c", shape=100, chunks=10) assert isinstance(c, Array) assert 0 == c.fill_value - d = grp.ones('d', shape=100, chunks=10) + d = grp.ones("d", shape=100, chunks=10) assert isinstance(d, Array) assert 1 == d.fill_value - e = grp.full('e', shape=100, chunks=10, fill_value=42) + e = grp.full("e", shape=100, chunks=10, fill_value=42) assert isinstance(e, Array) assert 42 == e.fill_value - f = grp.empty_like('f', a) + f = grp.empty_like("f", a) assert isinstance(f, Array) assert f.fill_value is None - g = grp.zeros_like('g', a) + g = grp.zeros_like("g", a) assert isinstance(g, Array) assert 0 == g.fill_value - h = grp.ones_like('h', a) + h = grp.ones_like("h", a) assert isinstance(h, Array) assert 1 == h.fill_value - i = grp.full_like('i', e) + i = grp.full_like("i", e) assert isinstance(i, Array) assert 42 == i.fill_value - j = grp.array('j', data=np.arange(100), chunks=10) + j = grp.array("j", data=np.arange(100), chunks=10) assert isinstance(j, Array) assert_array_equal(np.arange(100), j[:]) @@ -974,81 +1007,80 @@ def test_array_creation(self): grp = self.create_group(read_only=True) with pytest.raises(PermissionError): - grp.create('aa', shape=100, chunks=10) + grp.create("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.empty('aa', shape=100, chunks=10) + grp.empty("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.zeros('aa', shape=100, chunks=10) + grp.zeros("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.ones('aa', shape=100, chunks=10) + grp.ones("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.full('aa', shape=100, chunks=10, fill_value=42) + grp.full("aa", shape=100, chunks=10, fill_value=42) with pytest.raises(PermissionError): - grp.array('aa', data=np.arange(100), chunks=10) + grp.array("aa", data=np.arange(100), chunks=10) with pytest.raises(PermissionError): - grp.create('aa', shape=100, chunks=10) + grp.create("aa", shape=100, chunks=10) with pytest.raises(PermissionError): - grp.empty_like('aa', a) + grp.empty_like("aa", a) with pytest.raises(PermissionError): - grp.zeros_like('aa', a) + grp.zeros_like("aa", a) with pytest.raises(PermissionError): - grp.ones_like('aa', a) + grp.ones_like("aa", a) with pytest.raises(PermissionError): - grp.full_like('aa', a) + grp.full_like("aa", a) grp.store.close() def test_paths(self): g1 = self.create_group() - g2 = g1.create_group('foo/bar') + g2 = g1.create_group("foo/bar") if g1._version == 2: - assert g1 == g1['/'] - assert g1 == g1['//'] - assert g1 == g1['///'] - assert g1 == g2['/'] - assert g1 == g2['//'] - assert g1 == g2['///'] - assert g2 == g1['foo/bar'] - assert g2 == g1['/foo/bar'] - assert g2 == g1['foo/bar/'] - assert g2 == g1['//foo/bar'] - assert g2 == g1['//foo//bar//'] - assert g2 == g1['///foo///bar///'] - assert g2 == g2['/foo/bar'] + assert g1 == g1["/"] + assert g1 == g1["//"] + assert g1 == g1["///"] + assert g1 == g2["/"] + assert g1 == g2["//"] + assert g1 == g2["///"] + assert g2 == g1["foo/bar"] + assert g2 == g1["/foo/bar"] + assert g2 == g1["foo/bar/"] + assert g2 == g1["//foo/bar"] + assert g2 == g1["//foo//bar//"] + assert g2 == g1["///foo///bar///"] + assert g2 == g2["/foo/bar"] else: # the expected key format gives a match - assert g2 == g1['foo/bar'] + assert g2 == g1["foo/bar"] # TODO: Should presence of a trailing slash raise KeyError? # The spec says "the final character is not a / character" # but we currently strip trailing '/' as done for v2. - assert g2 == g1['foo/bar/'] + assert g2 == g1["foo/bar/"] # double slash also currently works (spec doesn't mention this # case, but have kept it for v2 behavior compatibility) - assert g2 == g1['foo//bar'] + assert g2 == g1["foo//bar"] # TODO, root: fix these cases # v3: leading / implies we are at the root, not within a group, # so these all raise KeyError - for path in ['/foo/bar', '//foo/bar', '//foo//bar//', - '///fooo///bar///']: + for path in ["/foo/bar", "//foo/bar", "//foo//bar//", "///fooo///bar///"]: with pytest.raises(KeyError): g1[path] with pytest.raises(ValueError): - g1['.'] + g1["."] with pytest.raises(ValueError): - g1['..'] + g1[".."] with pytest.raises(ValueError): - g1['foo/.'] + g1["foo/."] with pytest.raises(ValueError): - g1['foo/..'] + g1["foo/.."] with pytest.raises(ValueError): - g1['foo/./bar'] + g1["foo/./bar"] with pytest.raises(ValueError): - g1['foo/../bar'] + g1["foo/../bar"] g1.store.close() @@ -1056,7 +1088,7 @@ def test_pickle(self): # setup group g = self.create_group() - d = g.create_dataset('foo/bar', shape=100, chunks=10) + d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) path = g.path name = g.name @@ -1075,19 +1107,19 @@ def test_pickle(self): assert name == g2.name assert n == len(g2) assert keys == list(g2) - assert isinstance(g2['foo'], Group) - assert isinstance(g2['foo/bar'], Array) + assert isinstance(g2["foo"], Group) + assert isinstance(g2["foo/bar"], Array) g2.store.close() def test_context_manager(self): with self.create_group() as g: - d = g.create_dataset('foo/bar', shape=100, chunks=10) + d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) -@pytest.mark.parametrize('chunk_dict', [False, True]) +@pytest.mark.parametrize("chunk_dict", [False, True]) def test_group_init_from_dict(chunk_dict): if chunk_dict: store, chunk_store = dict(), dict() @@ -1106,20 +1138,25 @@ def test_group_init_from_dict(chunk_dict): # noinspection PyStatementEffect @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3(TestGroup, unittest.TestCase): - @staticmethod def create_store(): # can be overridden in sub-classes return KVStoreV3(dict()), None - def create_group(self, store=None, path='group', read_only=False, - chunk_store=None, synchronizer=None): + def create_group( + self, store=None, path="group", read_only=False, chunk_store=None, synchronizer=None + ): # can be overridden in sub-classes if store is None: store, chunk_store = self.create_store() init_group(store, path=path, chunk_store=chunk_store) - g = Group(store, path=path, read_only=read_only, - chunk_store=chunk_store, synchronizer=synchronizer) + g = Group( + store, + path=path, + read_only=read_only, + chunk_store=chunk_store, + synchronizer=synchronizer, + ) return g def test_group_init_1(self): @@ -1132,13 +1169,13 @@ def test_group_init_1(self): assert chunk_store is g.chunk_store assert not g.read_only # different path/name in v3 case - assert 'group' == g.path - assert '/group' == g.name - assert 'group' == g.basename + assert "group" == g.path + assert "/group" == g.name + assert "group" == g.basename assert isinstance(g.attrs, Attributes) - g.attrs['foo'] = 'bar' - assert g.attrs['foo'] == 'bar' + g.attrs["foo"] = "bar" + assert g.attrs["foo"] == "bar" assert isinstance(g.info, InfoReporter) assert isinstance(repr(g.info), str) @@ -1147,7 +1184,7 @@ def test_group_init_1(self): def test_group_init_errors_2(self): store, chunk_store = self.create_store() - path = 'tmp' + path = "tmp" init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) # array blocks group with pytest.raises(ValueError): @@ -1156,7 +1193,6 @@ def test_group_init_errors_2(self): class TestGroupWithMemoryStore(TestGroup): - @staticmethod def create_store(): return MemoryStore(), None @@ -1165,14 +1201,12 @@ def create_store(): # noinspection PyStatementEffect @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithMemoryStore(TestGroupWithMemoryStore, TestGroupV3): - @staticmethod def create_store(): return MemoryStoreV3(), None class TestGroupWithDirectoryStore(TestGroup): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1183,7 +1217,6 @@ def create_store(): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithDirectoryStore(TestGroupWithDirectoryStore, TestGroupV3): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1194,7 +1227,6 @@ def create_store(): @skip_test_env_var("ZARR_TEST_ABS") class TestGroupWithABSStore(TestGroup): - @staticmethod def create_store(): container_client = abs_container() @@ -1211,7 +1243,6 @@ def test_pickle(self): @skip_test_env_var("ZARR_TEST_ABS") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithABSStore(TestGroupV3): - @staticmethod def create_store(): container_client = abs_container() @@ -1226,7 +1257,6 @@ def test_pickle(self): class TestGroupWithNestedDirectoryStore(TestGroup): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1237,7 +1267,6 @@ def create_store(): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithFSStore(TestGroup): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1247,21 +1276,19 @@ def create_store(): def test_round_trip_nd(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, mode='w') - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None) + f = open_group(store, mode="w") + f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) assert name in f - h = open_group(store, mode='r') + h = open_group(store, mode="r") np.testing.assert_array_equal(h[name][:], data) @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithFSStore(TestGroupWithFSStore, TestGroupV3): - @staticmethod def create_store(): path = tempfile.mkdtemp() @@ -1271,80 +1298,78 @@ def create_store(): def test_round_trip_nd(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, path='group', mode='w') - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None) - h = open_group(store, path='group', mode='r') + f = open_group(store, path="group", mode="w") + f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) + h = open_group(store, path="group", mode="r") np.testing.assert_array_equal(h[name][:], data) - f = open_group(store, path='group2', mode='w') + f = open_group(store, path="group2", mode="w") data_size = data.nbytes - group_meta_size = buffer_size(store[meta_root + 'group.group.json']) - group2_meta_size = buffer_size(store[meta_root + 'group2.group.json']) - array_meta_size = buffer_size(store[meta_root + 'group/raw.array.json']) + group_meta_size = buffer_size(store[meta_root + "group.group.json"]) + group2_meta_size = buffer_size(store[meta_root + "group2.group.json"]) + array_meta_size = buffer_size(store[meta_root + "group/raw.array.json"]) assert store.getsize() == data_size + group_meta_size + group2_meta_size + array_meta_size # added case with path to complete coverage - assert store.getsize('group') == data_size + group_meta_size + array_meta_size - assert store.getsize('group2') == group2_meta_size - assert store.getsize('group/raw') == data_size + array_meta_size + assert store.getsize("group") == data_size + group_meta_size + array_meta_size + assert store.getsize("group2") == group2_meta_size + assert store.getsize("group/raw") == data_size + array_meta_size @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithNestedFSStore(TestGroupWithFSStore): - @staticmethod def create_store(): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStore(path, key_separator='/', auto_mkdir=True) + store = FSStore(path, key_separator="/", auto_mkdir=True) return store, None def test_inconsistent_dimension_separator(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, mode='w') + f = open_group(store, mode="w") # cannot specify dimension_separator that conflicts with the store with pytest.raises(ValueError): - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None, dimension_separator='.') + f.create_dataset( + name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." + ) @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithNestedFSStore(TestGroupV3WithFSStore): - @staticmethod def create_store(): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStoreV3(path, key_separator='/', auto_mkdir=True) + store = FSStoreV3(path, key_separator="/", auto_mkdir=True) return store, None def test_inconsistent_dimension_separator(self): data = np.arange(1000).reshape(10, 10, 10) - name = 'raw' + name = "raw" store, _ = self.create_store() - f = open_group(store, path='group', mode='w') + f = open_group(store, path="group", mode="w") # cannot specify dimension_separator that conflicts with the store with pytest.raises(ValueError): - f.create_dataset(name, data=data, chunks=(5, 5, 5), - compressor=None, dimension_separator='.') + f.create_dataset( + name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." + ) class TestGroupWithZipStore(TestGroup): - @staticmethod def create_store(): - path = mktemp(suffix='.zip') + path = mktemp(suffix=".zip") atexit.register(os.remove, path) store = ZipStore(path) return store, None @@ -1353,7 +1378,7 @@ def test_context_manager(self): with self.create_group() as g: store = g.store - d = g.create_dataset('foo/bar', shape=100, chunks=10) + d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) # Check that exiting the context manager closes the store, @@ -1369,65 +1394,59 @@ def test_move(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithZipStore(TestGroupWithZipStore, TestGroupV3): - @staticmethod def create_store(): - path = mktemp(suffix='.zip') + path = mktemp(suffix=".zip") atexit.register(os.remove, path) store = ZipStoreV3(path) return store, None class TestGroupWithDBMStore(TestGroup): - @staticmethod def create_store(): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') - store = DBMStore(path, flag='n') + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") + store = DBMStore(path, flag="n") return store, None @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithDBMStore(TestGroupWithDBMStore, TestGroupV3): - @staticmethod def create_store(): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') - store = DBMStoreV3(path, flag='n') + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") + store = DBMStoreV3(path, flag="n") return store, None class TestGroupWithDBMStoreBerkeleyDB(TestGroup): - @staticmethod def create_store(): bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStore(path, flag='n', open=bsddb3.btopen) + store = DBMStore(path, flag="n", open=bsddb3.btopen) return store, None @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithDBMStoreBerkeleyDB(TestGroupWithDBMStoreBerkeleyDB, TestGroupV3): - @staticmethod def create_store(): bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStoreV3(path, flag='n', open=bsddb3.btopen) + store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) return store, None class TestGroupWithLMDBStore(TestGroup): - @staticmethod def create_store(): pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStore(path) return store, None @@ -1435,21 +1454,19 @@ def create_store(): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithLMDBStore(TestGroupWithLMDBStore, TestGroupV3): - @staticmethod def create_store(): pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) store = LMDBStoreV3(path) return store, None class TestGroupWithSQLiteStore(TestGroup): - def create_store(self): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path) return store, None @@ -1457,17 +1474,15 @@ def create_store(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithSQLiteStore(TestGroupWithSQLiteStore, TestGroupV3): - def create_store(self): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStoreV3(path) return store, None class TestGroupWithChunkStore(TestGroup): - @staticmethod def create_store(): return KVStore(dict()), KVStore(dict()) @@ -1482,24 +1497,23 @@ def test_chunk_store(self): assert chunk_store is g.chunk_store # create array - a = g.zeros('foo', shape=100, chunks=10) + a = g.zeros("foo", shape=100, chunks=10) assert store is a.store assert chunk_store is a.chunk_store a[:] = np.arange(100) assert_array_equal(np.arange(100), a[:]) # check store keys - expect = sorted([group_meta_key, 'foo/' + array_meta_key]) + expect = sorted([group_meta_key, "foo/" + array_meta_key]) actual = sorted(store.keys()) assert expect == actual - expect = ['foo/' + str(i) for i in range(10)] + expect = ["foo/" + str(i) for i in range(10)] actual = sorted(chunk_store.keys()) assert expect == actual @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithChunkStore(TestGroupWithChunkStore, TestGroupV3): - @staticmethod def create_store(): return KVStoreV3(dict()), KVStoreV3(dict()) @@ -1507,7 +1521,7 @@ def create_store(): def test_chunk_store(self): # setup store, chunk_store = self.create_store() - path = 'group1' + path = "group1" g = self.create_group(store, path=path, chunk_store=chunk_store) # check attributes @@ -1515,26 +1529,25 @@ def test_chunk_store(self): assert chunk_store is g.chunk_store # create array - a = g.zeros('foo', shape=100, chunks=10) + a = g.zeros("foo", shape=100, chunks=10) assert store is a.store assert chunk_store is a.chunk_store a[:] = np.arange(100) assert_array_equal(np.arange(100), a[:]) # check store keys - group_key = meta_root + path + '.group.json' - array_key = meta_root + path + '/foo' + '.array.json' - expect = sorted([group_key, array_key, 'zarr.json']) + group_key = meta_root + path + ".group.json" + array_key = meta_root + path + "/foo" + ".array.json" + expect = sorted([group_key, array_key, "zarr.json"]) actual = sorted(store.keys()) assert expect == actual - expect = [data_root + path + '/foo/c' + str(i) for i in range(10)] - expect += ['zarr.json'] + expect = [data_root + path + "/foo/c" + str(i) for i in range(10)] + expect += ["zarr.json"] actual = sorted(chunk_store.keys()) assert expect == actual class TestGroupWithStoreCache(TestGroup): - @staticmethod def create_store(): store = LRUStoreCache(dict(), max_size=None) @@ -1543,26 +1556,25 @@ def create_store(): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestGroupV3WithStoreCache(TestGroupWithStoreCache, TestGroupV3): - @staticmethod def create_store(): store = LRUStoreCacheV3(dict(), max_size=None) return store, None -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group(zarr_version): # test the group() convenience function # basic usage if zarr_version == 2: g = group() - assert '' == g.path - assert '/' == g.name + assert "" == g.path + assert "/" == g.name else: - g = group(path='group1', zarr_version=zarr_version) - assert 'group1' == g.path - assert '/group1' == g.name + g = group(path="group1", zarr_version=zarr_version) + assert "group1" == g.path + assert "/group1" == g.name assert isinstance(g, Group) # usage with custom store @@ -1571,7 +1583,7 @@ def test_group(zarr_version): path = None else: store = KVStoreV3(dict()) - path = 'foo' + path = "foo" g = group(store=store, path=path) assert isinstance(g, Group) assert store is g.store @@ -1582,7 +1594,7 @@ def test_group(zarr_version): path = None else: store = KVStoreV3(dict()) - path = 'foo' + path = "foo" init_array(store, path=path, shape=100, chunks=10) with pytest.raises(ValueError): group(store, path=path) @@ -1591,8 +1603,8 @@ def test_group(zarr_version): assert store is g.store -@pytest.mark.skipif(have_fsspec is False, reason='needs fsspec') -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group_writeable_mode(zarr_version, tmp_path): # Regression test for https://github.com/zarr-developers/zarr-python/issues/1353 import fsspec @@ -1602,179 +1614,179 @@ def test_group_writeable_mode(zarr_version, tmp_path): assert zg.store.map == store -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group(zarr_version): # test the open_group() convenience function - store = 'data/group.zarr' + store = "data/group.zarr" expected_store_type = DirectoryStore if zarr_version == 2 else DirectoryStoreV3 # mode == 'w' - path = None if zarr_version == 2 else 'group1' - g = open_group(store, path=path, mode='w', zarr_version=zarr_version) + path = None if zarr_version == 2 else "group1" + g = open_group(store, path=path, mode="w", zarr_version=zarr_version) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) - g.create_groups('foo', 'bar') + g.create_groups("foo", "bar") assert 2 == len(g) # mode in 'r', 'r+' - open_array('data/array.zarr', shape=100, chunks=10, mode='w') - for mode in 'r', 'r+': + open_array("data/array.zarr", shape=100, chunks=10, mode="w") + for mode in "r", "r+": with pytest.raises(ValueError): - open_group('doesnotexist', mode=mode) + open_group("doesnotexist", mode=mode) with pytest.raises(ValueError): - open_group('data/array.zarr', mode=mode) - g = open_group(store, mode='r') + open_group("data/array.zarr", mode=mode) + g = open_group(store, mode="r") assert isinstance(g, Group) assert 2 == len(g) with pytest.raises(PermissionError): - g.create_group('baz') - g = open_group(store, mode='r+') + g.create_group("baz") + g = open_group(store, mode="r+") assert isinstance(g, Group) assert 2 == len(g) - g.create_groups('baz', 'quux') + g.create_groups("baz", "quux") assert 4 == len(g) # mode == 'a' shutil.rmtree(store) - g = open_group(store, path=path, mode='a', zarr_version=zarr_version) + g = open_group(store, path=path, mode="a", zarr_version=zarr_version) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) - g.create_groups('foo', 'bar') + g.create_groups("foo", "bar") assert 2 == len(g) if zarr_version == 2: with pytest.raises(ValueError): - open_group('data/array.zarr', mode='a', zarr_version=zarr_version) + open_group("data/array.zarr", mode="a", zarr_version=zarr_version) else: # TODO, root: should this raise an error? - open_group('data/array.zarr', mode='a', zarr_version=zarr_version) + open_group("data/array.zarr", mode="a", zarr_version=zarr_version) # mode in 'w-', 'x' - for mode in 'w-', 'x': + for mode in "w-", "x": shutil.rmtree(store) g = open_group(store, path=path, mode=mode, zarr_version=zarr_version) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) - g.create_groups('foo', 'bar') + g.create_groups("foo", "bar") assert 2 == len(g) with pytest.raises(ValueError): open_group(store, path=path, mode=mode, zarr_version=zarr_version) if zarr_version == 2: with pytest.raises(ValueError): - open_group('data/array.zarr', mode=mode) + open_group("data/array.zarr", mode=mode) # open with path - g = open_group(store, path='foo/bar', zarr_version=zarr_version) + g = open_group(store, path="foo/bar", zarr_version=zarr_version) assert isinstance(g, Group) - assert 'foo/bar' == g.path + assert "foo/bar" == g.path -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group_completions(zarr_version): - path = None if zarr_version == 2 else 'group1' + path = None if zarr_version == 2 else "group1" g = group(path=path, zarr_version=zarr_version) d = dir(g) - assert 'foo' not in d - assert 'bar' not in d - assert 'baz' not in d - assert 'qux' not in d - assert 'xxx' not in d - assert 'yyy' not in d - assert 'zzz' not in d - assert '123' not in d - assert '456' not in d - g.create_groups('foo', 'bar', 'baz/qux', '123') - g.zeros('xxx', shape=100) - g.zeros('yyy', shape=100) - g.zeros('zzz', shape=100) - g.zeros('456', shape=100) + assert "foo" not in d + assert "bar" not in d + assert "baz" not in d + assert "qux" not in d + assert "xxx" not in d + assert "yyy" not in d + assert "zzz" not in d + assert "123" not in d + assert "456" not in d + g.create_groups("foo", "bar", "baz/qux", "123") + g.zeros("xxx", shape=100) + g.zeros("yyy", shape=100) + g.zeros("zzz", shape=100) + g.zeros("456", shape=100) d = dir(g) - assert 'foo' in d - assert 'bar' in d - assert 'baz' in d - assert 'qux' not in d - assert 'xxx' in d - assert 'yyy' in d - assert 'zzz' in d - assert '123' not in d # not valid identifier - assert '456' not in d # not valid identifier - - -@pytest.mark.parametrize('zarr_version', _VERSIONS) + assert "foo" in d + assert "bar" in d + assert "baz" in d + assert "qux" not in d + assert "xxx" in d + assert "yyy" in d + assert "zzz" in d + assert "123" not in d # not valid identifier + assert "456" not in d # not valid identifier + + +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_group_key_completions(zarr_version): - path = None if zarr_version == 2 else 'group1' + path = None if zarr_version == 2 else "group1" g = group(path=path, zarr_version=zarr_version) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() # none of these names should be an attribute - assert 'foo' not in d - assert 'bar' not in d - assert 'baz' not in d - assert 'qux' not in d - assert 'xxx' not in d - assert 'yyy' not in d - assert 'zzz' not in d - assert '123' not in d - assert '456' not in d - assert 'asdf;' not in d + assert "foo" not in d + assert "bar" not in d + assert "baz" not in d + assert "qux" not in d + assert "xxx" not in d + assert "yyy" not in d + assert "zzz" not in d + assert "123" not in d + assert "456" not in d + assert "asdf;" not in d # none of these names should be an item - assert 'foo' not in k - assert 'bar' not in k - assert 'baz' not in k - assert 'qux' not in k - assert 'xxx' not in k - assert 'yyy' not in k - assert 'zzz' not in k - assert '123' not in k - assert '456' not in k - assert 'asdf;' not in k - - g.create_groups('foo', 'bar', 'baz/qux', '123') - g.zeros('xxx', shape=100) - g.zeros('yyy', shape=100) - g.zeros('zzz', shape=100) - g.zeros('456', shape=100) + assert "foo" not in k + assert "bar" not in k + assert "baz" not in k + assert "qux" not in k + assert "xxx" not in k + assert "yyy" not in k + assert "zzz" not in k + assert "123" not in k + assert "456" not in k + assert "asdf;" not in k + + g.create_groups("foo", "bar", "baz/qux", "123") + g.zeros("xxx", shape=100) + g.zeros("yyy", shape=100) + g.zeros("zzz", shape=100) + g.zeros("456", shape=100) if zarr_version == 2: - g.zeros('asdf;', shape=100) + g.zeros("asdf;", shape=100) else: # cannot have ; in key name for v3 with pytest.raises(ValueError): - g.zeros('asdf;', shape=100) + g.zeros("asdf;", shape=100) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() - assert 'foo' in d - assert 'bar' in d - assert 'baz' in d - assert 'qux' not in d - assert 'xxx' in d - assert 'yyy' in d - assert 'zzz' in d - assert '123' not in d # not valid identifier - assert '456' not in d # not valid identifier + assert "foo" in d + assert "bar" in d + assert "baz" in d + assert "qux" not in d + assert "xxx" in d + assert "yyy" in d + assert "zzz" in d + assert "123" not in d # not valid identifier + assert "456" not in d # not valid identifier if zarr_version == 2: - assert 'asdf;' not in d # not valid identifier - - assert 'foo' in k - assert 'bar' in k - assert 'baz' in k - assert 'qux' not in k - assert 'xxx' in k - assert 'yyy' in k - assert 'zzz' in k - assert '123' in k - assert '456' in k + assert "asdf;" not in d # not valid identifier + + assert "foo" in k + assert "bar" in k + assert "baz" in k + assert "qux" not in k + assert "xxx" in k + assert "yyy" in k + assert "zzz" in k + assert "123" in k + assert "456" in k if zarr_version == 2: - assert 'asdf;' in k + assert "asdf;" in k def _check_tree(g, expect_bytes, expect_text): @@ -1788,72 +1800,88 @@ def _check_tree(g, expect_bytes, expect_text): isinstance(widget, ipytree.Tree) -@pytest.mark.parametrize('zarr_version', _VERSIONS) -@pytest.mark.parametrize('at_root', [False, True]) +@pytest.mark.parametrize("zarr_version", _VERSIONS) +@pytest.mark.parametrize("at_root", [False, True]) def test_tree(zarr_version, at_root): # setup - path = None if at_root else 'group1' + path = None if at_root else "group1" g1 = group(path=path, zarr_version=zarr_version) - g2 = g1.create_group('foo') - g3 = g1.create_group('bar') - g3.create_group('baz') - g5 = g3.create_group('quux') - g5.create_dataset('baz', shape=100, chunks=10) + g2 = g1.create_group("foo") + g3 = g1.create_group("bar") + g3.create_group("baz") + g5 = g3.create_group("quux") + g5.create_dataset("baz", shape=100, chunks=10) - tree_path = '/' if at_root else path + tree_path = "/" if at_root else path # test root group if zarr_version == 2: - expect_bytes = textwrap.dedent(f"""\ + expect_bytes = textwrap.dedent( + f"""\ {tree_path} +-- bar | +-- baz | +-- quux | +-- baz (100,) float64 - +-- foo""").encode() - expect_text = textwrap.dedent(f"""\ + +-- foo""" + ).encode() + expect_text = textwrap.dedent( + f"""\ {tree_path} ├── bar │ ├── baz │ └── quux │ └── baz (100,) float64 - └── foo""") + └── foo""" + ) else: # Almost the same as for v2, but has a path name and the # subgroups are not necessarily sorted alphabetically. - expect_bytes = textwrap.dedent(f"""\ + expect_bytes = textwrap.dedent( + f"""\ {tree_path} +-- foo +-- bar +-- baz +-- quux - +-- baz (100,) float64""").encode() - expect_text = textwrap.dedent(f"""\ + +-- baz (100,) float64""" + ).encode() + expect_text = textwrap.dedent( + f"""\ {tree_path} ├── foo └── bar ├── baz └── quux - └── baz (100,) float64""") + └── baz (100,) float64""" + ) _check_tree(g1, expect_bytes, expect_text) # test different group - expect_bytes = textwrap.dedent("""\ - foo""").encode() - expect_text = textwrap.dedent("""\ - foo""") + expect_bytes = textwrap.dedent( + """\ + foo""" + ).encode() + expect_text = textwrap.dedent( + """\ + foo""" + ) _check_tree(g2, expect_bytes, expect_text) # test different group - expect_bytes = textwrap.dedent("""\ + expect_bytes = textwrap.dedent( + """\ bar +-- baz +-- quux - +-- baz (100,) float64""").encode() - expect_text = textwrap.dedent("""\ + +-- baz (100,) float64""" + ).encode() + expect_text = textwrap.dedent( + """\ bar ├── baz └── quux - └── baz (100,) float64""") + └── baz (100,) float64""" + ) _check_tree(g3, expect_bytes, expect_text) @@ -1866,38 +1894,38 @@ def test_group_mismatched_store_versions(): chunk_store_v2 = KVStore(dict()) chunk_store_v3 = KVStoreV3(dict()) - init_group(store_v2, path='group1', chunk_store=chunk_store_v2) - init_group(store_v3, path='group1', chunk_store=chunk_store_v3) + init_group(store_v2, path="group1", chunk_store=chunk_store_v2) + init_group(store_v3, path="group1", chunk_store=chunk_store_v3) - g1_v3 = Group(store_v3, path='group1', read_only=True, chunk_store=chunk_store_v3) + g1_v3 = Group(store_v3, path="group1", read_only=True, chunk_store=chunk_store_v3) assert isinstance(g1_v3._store, KVStoreV3) - g1_v2 = Group(store_v2, path='group1', read_only=True, chunk_store=chunk_store_v2) + g1_v2 = Group(store_v2, path="group1", read_only=True, chunk_store=chunk_store_v2) assert isinstance(g1_v2._store, KVStore) # store and chunk_store must have the same zarr protocol version with pytest.raises(ValueError): - Group(store_v3, path='group1', read_only=False, chunk_store=chunk_store_v2) + Group(store_v3, path="group1", read_only=False, chunk_store=chunk_store_v2) with pytest.raises(ValueError): - Group(store_v2, path='group1', read_only=False, chunk_store=chunk_store_v3) + Group(store_v2, path="group1", read_only=False, chunk_store=chunk_store_v3) with pytest.raises(ValueError): - open_group(store_v2, path='group1', chunk_store=chunk_store_v3) + open_group(store_v2, path="group1", chunk_store=chunk_store_v3) with pytest.raises(ValueError): - open_group(store_v3, path='group1', chunk_store=chunk_store_v2) + open_group(store_v3, path="group1", chunk_store=chunk_store_v2) # raises Value if read_only and path is not a pre-existing group with pytest.raises(ValueError): - Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) + Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) with pytest.raises(ValueError): - Group(store_v3, path='group2', read_only=True, chunk_store=chunk_store_v3) + Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) -@pytest.mark.parametrize('zarr_version', _VERSIONS) +@pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group_from_paths(zarr_version): """Verify zarr_version is applied to both the store and chunk_store.""" store = tempfile.mkdtemp() chunk_store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) atexit.register(atexit_rmtree, chunk_store) - path = 'g1' + path = "g1" g = open_group(store, path=path, chunk_store=chunk_store, zarr_version=zarr_version) assert g._store._store_version == g._chunk_store._store_version == zarr_version diff --git a/zarr/tests/test_indexing.py b/zarr/tests/test_indexing.py index 61e76c63da..8a34c1e715 100644 --- a/zarr/tests/test_indexing.py +++ b/zarr/tests/test_indexing.py @@ -51,22 +51,20 @@ def test_replace_ellipsis(): assert (slice(None), 0) == replace_ellipsis((slice(None), 0), (100, 100)) # 2D slice - assert ((slice(None), slice(None)) == - replace_ellipsis(Ellipsis, (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis(slice(None), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((Ellipsis, slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), Ellipsis), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), Ellipsis, slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((Ellipsis, slice(None), slice(None)), (100, 100))) - assert ((slice(None), slice(None)) == - replace_ellipsis((slice(None), slice(None), Ellipsis), (100, 100))) + assert (slice(None), slice(None)) == replace_ellipsis(Ellipsis, (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis(slice(None), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((slice(None), slice(None)), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((Ellipsis, slice(None)), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis((slice(None), Ellipsis), (100, 100)) + assert (slice(None), slice(None)) == replace_ellipsis( + (slice(None), Ellipsis, slice(None)), (100, 100) + ) + assert (slice(None), slice(None)) == replace_ellipsis( + (Ellipsis, slice(None), slice(None)), (100, 100) + ) + assert (slice(None), slice(None)) == replace_ellipsis( + (slice(None), slice(None), Ellipsis), (100, 100) + ) def test_get_basic_selection_0d(): @@ -87,25 +85,25 @@ def test_get_basic_selection_0d(): assert_array_equal(a, b) # test structured array - value = (b'aaa', 1, 4.2) - a = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + value = (b"aaa", 1, 4.2) + a = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) z[()] = value assert_array_equal(a, z.get_basic_selection(Ellipsis)) assert_array_equal(a, z[...]) assert a[()] == z.get_basic_selection(()) assert a[()] == z[()] - assert b'aaa' == z.get_basic_selection((), fields='foo') - assert b'aaa' == z['foo'] - assert a[['foo', 'bar']] == z.get_basic_selection((), fields=['foo', 'bar']) - assert a[['foo', 'bar']] == z['foo', 'bar'] + assert b"aaa" == z.get_basic_selection((), fields="foo") + assert b"aaa" == z["foo"] + assert a[["foo", "bar"]] == z.get_basic_selection((), fields=["foo", "bar"]) + assert a[["foo", "bar"]] == z["foo", "bar"] # test out param b = np.zeros_like(a) z.get_basic_selection(Ellipsis, out=b) assert_array_equal(a, b) - c = np.zeros_like(a[['foo', 'bar']]) - z.get_basic_selection(Ellipsis, out=c, fields=['foo', 'bar']) - assert_array_equal(a[['foo', 'bar']], c) + c = np.zeros_like(a[["foo", "bar"]]) + z.get_basic_selection(Ellipsis, out=c, fields=["foo", "bar"]) + assert_array_equal(a[["foo", "bar"]], c) basic_selections_1d = [ @@ -175,8 +173,8 @@ def test_get_basic_selection_0d(): slice(-1, 0, -1), # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), @@ -252,8 +250,8 @@ def test_get_basic_selection_1d(): basic_selections_2d_bad = [ # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (2.3, slice(None)), # only positive step supported @@ -300,71 +298,34 @@ def test_fancy_indexing_fallback_on_get_setitem(): [0, 0, 0, 1], ], ) - np.testing.assert_array_equal( - z[[1, 2, 3], [1, 2, 3]], 1 - ) + np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) # test broadcasting - np.testing.assert_array_equal( - z[1, [1, 2, 3]], [1, 0, 0] - ) + np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) # test 1D fancy indexing z2 = zarr.zeros(5) z2[[1, 2, 3]] = 1 - np.testing.assert_array_equal( - z2, [0, 1, 1, 1, 0] - ) + np.testing.assert_array_equal(z2, [0, 1, 1, 1, 0]) -@pytest.mark.parametrize("index,expected_result", - [ - # Single iterable of integers - ( - [0, 1], - [[0, 1, 2], - [3, 4, 5]] - ), - # List first, then slice - ( - ([0, 1], slice(None)), - [[0, 1, 2], - [3, 4, 5]] - ), - # List first, then slice - ( - ([0, 1], slice(1, None)), - [[1, 2], - [4, 5]] - ), - # Slice first, then list - ( - (slice(0, 2), [0, 2]), - [[0, 2], - [3, 5]] - ), - # Slices only - ( - (slice(0, 2), slice(0, 2)), - [[0, 1], - [3, 4]] - ), - # List with repeated index - ( - ([1, 0, 1], slice(1, None)), - [[4, 5], - [1, 2], - [4, 5]] - ), - # 1D indexing - ( - ([1, 0, 1]), - [ - [3, 4, 5], - [0, 1, 2], - [3, 4, 5] - ] - ) - - ]) +@pytest.mark.parametrize( + "index,expected_result", + [ + # Single iterable of integers + ([0, 1], [[0, 1, 2], [3, 4, 5]]), + # List first, then slice + (([0, 1], slice(None)), [[0, 1, 2], [3, 4, 5]]), + # List first, then slice + (([0, 1], slice(1, None)), [[1, 2], [4, 5]]), + # Slice first, then list + ((slice(0, 2), [0, 2]), [[0, 2], [3, 5]]), + # Slices only + ((slice(0, 2), slice(0, 2)), [[0, 1], [3, 4]]), + # List with repeated index + (([1, 0, 1], slice(1, None)), [[4, 5], [1, 2], [4, 5]]), + # 1D indexing + (([1, 0, 1]), [[3, 4, 5], [0, 1, 2], [3, 4, 5]]), + ], +) def test_orthogonal_indexing_fallback_on_getitem_2d(index, expected_result): """ Tests the orthogonal indexing fallback on __getitem__ for a 2D matrix. @@ -382,34 +343,19 @@ def test_orthogonal_indexing_fallback_on_getitem_2d(index, expected_result): np.testing.assert_array_equal(z[index], expected_result) -@pytest.mark.parametrize("index,expected_result", - [ - # Single iterable of integers - ( - [0, 1], - [[[0, 1, 2], - [3, 4, 5], - [6, 7, 8]], - [[9, 10, 11], - [12, 13, 14], - [15, 16, 17]]] - ), - # One slice, two integers - ( - (slice(0, 2), 1, 1), - [4, 13] - ), - # One integer, two slices - ( - (slice(0, 2), 1, slice(0, 2)), - [[3, 4], [12, 13]] - ), - # Two slices and a list - ( - (slice(0, 2), [1, 2], slice(0, 2)), - [[[3, 4], [6, 7]], [[12, 13], [15, 16]]] - ), - ]) +@pytest.mark.parametrize( + "index,expected_result", + [ + # Single iterable of integers + ([0, 1], [[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]), + # One slice, two integers + ((slice(0, 2), 1, 1), [4, 13]), + # One integer, two slices + ((slice(0, 2), 1, slice(0, 2)), [[3, 4], [12, 13]]), + # Two slices and a list + ((slice(0, 2), [1, 2], slice(0, 2)), [[[3, 4], [6, 7]], [[12, 13], [15, 16]]]), + ], +) def test_orthogonal_indexing_fallback_on_getitem_3d(index, expected_result): """ Tests the orthogonal indexing fallback on __getitem__ for a 3D matrix. @@ -439,36 +385,14 @@ def test_orthogonal_indexing_fallback_on_getitem_3d(index, expected_result): "index,expected_result", [ # Single iterable of integers - ( - [0, 1], - [ - [1, 1, 1], - [1, 1, 1], - [0, 0, 0] - ] - ), + ([0, 1], [[1, 1, 1], [1, 1, 1], [0, 0, 0]]), # List and slice combined - ( - ([0, 1], slice(1, 3)), - [[0, 1, 1], - [0, 1, 1], - [0, 0, 0]] - ), + (([0, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), # Index repetition is ignored on setitem - ( - ([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), - [[0, 1, 1], - [0, 1, 1], - [0, 0, 0]] - ), + (([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), # Slice with step - ( - ([0, 2], slice(None, None, 2)), - [[1, 0, 1], - [0, 0, 0], - [1, 0, 1]] - ) - ] + (([0, 2], slice(None, None, 2)), [[1, 0, 1], [0, 0, 0], [1, 0, 1]]), + ], ) def test_orthogonal_indexing_fallback_on_setitem_2d(index, expected_result): """ @@ -482,12 +406,8 @@ def test_orthogonal_indexing_fallback_on_setitem_2d(index, expected_result): z = zarr.array(a) z[index] = 1 a[index] = 1 - np.testing.assert_array_equal( - z, expected_result - ) - np.testing.assert_array_equal( - z, a, err_msg="Indexing disagrees with numpy" - ) + np.testing.assert_array_equal(z, expected_result) + np.testing.assert_array_equal(z, a, err_msg="Indexing disagrees with numpy") def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): @@ -495,15 +415,11 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): with pytest.raises(IndexError): z2[[1, 2, 3], [1, 2, 3]] = 2 with pytest.raises(IndexError): - np.testing.assert_array_equal( - z2[[1, 2, 3], [1, 2, 3]], 0 - ) + np.testing.assert_array_equal(z2[[1, 2, 3], [1, 2, 3]], 0) with pytest.raises(IndexError): z2[..., [1, 2, 3]] = 2 with pytest.raises(IndexError): - np.testing.assert_array_equal( - z2[..., [1, 2, 3]], 0 - ) + np.testing.assert_array_equal(z2[..., [1, 2, 3]], 0) def test_set_basic_selection_0d(): @@ -523,8 +439,8 @@ def test_set_basic_selection_0d(): assert_array_equal(v, z) # test structured array - value = (b'aaa', 1, 4.2) - v = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + value = (b"aaa", 1, 4.2) + v = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.zeros_like(v) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) @@ -538,19 +454,19 @@ def test_set_basic_selection_0d(): z[...] = a assert_array_equal(a, z) # with fields - z.set_basic_selection(Ellipsis, v['foo'], fields='foo') - assert v['foo'] == z['foo'] - assert a['bar'] == z['bar'] - assert a['baz'] == z['baz'] - z['bar'] = v['bar'] - assert v['foo'] == z['foo'] - assert v['bar'] == z['bar'] - assert a['baz'] == z['baz'] + z.set_basic_selection(Ellipsis, v["foo"], fields="foo") + assert v["foo"] == z["foo"] + assert a["bar"] == z["bar"] + assert a["baz"] == z["baz"] + z["bar"] = v["bar"] + assert v["foo"] == z["foo"] + assert v["bar"] == z["bar"] + assert a["baz"] == z["baz"] # multiple field assignment not supported with pytest.raises(IndexError): - z.set_basic_selection(Ellipsis, v[['foo', 'bar']], fields=['foo', 'bar']) + z.set_basic_selection(Ellipsis, v[["foo", "bar"]], fields=["foo", "bar"]) with pytest.raises(IndexError): - z[..., 'foo', 'bar'] = v[['foo', 'bar']] + z[..., "foo", "bar"] = v[["foo", "bar"]] def _test_get_orthogonal_selection(a, z, selection): @@ -610,7 +526,6 @@ def test_get_orthogonal_selection_1d_int(): [0, 3, 10, -23, -12, -1], # explicit test not sorted [3, 105, 23, 127], - ] for selection in selections: _test_get_orthogonal_selection(a, z, selection) @@ -671,7 +586,7 @@ def test_get_orthogonal_selection_2d(): # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) _test_get_orthogonal_selection_2d(a, z, ix0, ix1) ix0.sort() ix1.sort() @@ -738,14 +653,14 @@ def test_get_orthogonal_selection_3d(): # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) - ix2 = np.random.binomial(1, .5, size=a.shape[2]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) - ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) ix0.sort() ix1.sort() @@ -846,12 +761,12 @@ def test_set_orthogonal_selection_2d(): # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) ix0.sort() ix1.sort() @@ -904,14 +819,14 @@ def test_set_orthogonal_selection_3d(): # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) - ix2 = np.random.binomial(1, .5, size=a.shape[2]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) + ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) - ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True) - ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * .5), replace=True) + ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) + ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # sorted increasing @@ -939,19 +854,13 @@ def test_orthogonal_indexing_fallback_on_get_setitem(): [0, 0, 0, 1], ], ) - np.testing.assert_array_equal( - z[[1, 2, 3], [1, 2, 3]], 1 - ) + np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) # test broadcasting - np.testing.assert_array_equal( - z[1, [1, 2, 3]], [1, 0, 0] - ) + np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) # test 1D fancy indexing z2 = zarr.zeros(5) z2[[1, 2, 3]] = 1 - np.testing.assert_array_equal( - z2, [0, 1, 1, 1, 0] - ) + np.testing.assert_array_equal(z2, [0, 1, 1, 1, 0]) def _test_get_coordinate_selection(a, z, selection): @@ -969,8 +878,8 @@ def _test_get_coordinate_selection(a, z, selection): Ellipsis, # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), @@ -1060,10 +969,8 @@ def test_get_coordinate_selection_2d(): _test_get_coordinate_selection(a, z, (ix0, ix1)) # multi-dimensional selection - ix0 = np.array([[1, 1, 2], - [2, 2, 5]]) - ix1 = np.array([[1, 3, 2], - [1, 0, 0]]) + ix0 = np.array([[1, 1, 2], [2, 2, 5]]) + ix1 = np.array([[1, 3, 2], [1, 0, 0]]) _test_get_coordinate_selection(a, z, (ix0, ix1)) with pytest.raises(IndexError): @@ -1146,10 +1053,8 @@ def test_set_coordinate_selection_2d(): _test_set_coordinate_selection(v, a, z, selection) # multi-dimensional selection - ix0 = np.array([[1, 2, 3], - [4, 5, 6]]) - ix1 = np.array([[1, 3, 2], - [2, 0, 5]]) + ix0 = np.array([[1, 2, 3], [4, 5, 6]]) + ix1 = np.array([[1, 3, 2], [2, 0, 5]]) _test_set_coordinate_selection(v, a, z, (ix0, ix1)) @@ -1196,12 +1101,12 @@ def _test_get_block_selection(a, z, selection, expected_idx): slice(3, 8, 2), # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), - [0, 5, 3] + [0, 5, 3], ] @@ -1211,8 +1116,7 @@ def test_get_block_selection_1d(): z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) z[:] = a - for selection, expected_idx in \ - zip(block_selections_1d, block_selections_1d_array_projection): + for selection, expected_idx in zip(block_selections_1d, block_selections_1d_array_projection): _test_get_block_selection(a, z, selection, expected_idx) bad_selections = block_selections_1d_bad + [ @@ -1264,8 +1168,7 @@ def test_get_block_selection_2d(): z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) z[:] = a - for selection, expected_idx in \ - zip(block_selections_2d, block_selections_2d_array_projection): + for selection, expected_idx in zip(block_selections_2d, block_selections_2d_array_projection): _test_get_block_selection(a, z, selection, expected_idx) with pytest.raises(IndexError): @@ -1300,8 +1203,7 @@ def test_set_block_selection_1d(): a = np.empty(v.shape, dtype=v.dtype) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) - for selection, expected_idx in \ - zip(block_selections_1d, block_selections_1d_array_projection): + for selection, expected_idx in zip(block_selections_1d, block_selections_1d_array_projection): _test_set_block_selection(v, a, z, selection, expected_idx) for selection in block_selections_1d_bad: @@ -1317,8 +1219,7 @@ def test_set_block_selection_2d(): a = np.empty(v.shape, dtype=v.dtype) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) - for selection, expected_idx in \ - zip(block_selections_2d, block_selections_2d_array_projection): + for selection, expected_idx in zip(block_selections_2d, block_selections_2d_array_projection): _test_set_block_selection(v, a, z, selection, expected_idx) with pytest.raises(IndexError): @@ -1347,8 +1248,8 @@ def _test_get_mask_selection(a, z, selection): Ellipsis, # bad stuff 2.3, - 'foo', - b'xxx', + "foo", + b"xxx", None, (0, 0), (slice(None), slice(None)), @@ -1478,7 +1379,7 @@ def test_get_selection_out(): # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) - ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool) + ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) selections = [ # index both axes with array (ix0, ix1), @@ -1526,22 +1427,20 @@ def test_get_selection_out(): def test_get_selections_with_fields(): - a = [('aaa', 1, 4.2), - ('bbb', 2, 8.4), - ('ccc', 3, 12.6)] - a = np.array(a, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + a = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] + a = np.array(a, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) z[:] = a fields_fixture = [ - 'foo', - ['foo'], - ['foo', 'bar'], - ['foo', 'baz'], - ['bar', 'baz'], - ['foo', 'bar', 'baz'], - ['bar', 'foo'], - ['baz', 'bar', 'foo'], + "foo", + ["foo"], + ["foo", "bar"], + ["foo", "baz"], + ["bar", "baz"], + ["foo", "bar", "baz"], + ["bar", "foo"], + ["baz", "bar", "foo"], ] for fields in fields_fixture: @@ -1629,30 +1528,28 @@ def test_get_selections_with_fields(): # missing/bad fields with pytest.raises(IndexError): - z.get_basic_selection(Ellipsis, fields=['notafield']) + z.get_basic_selection(Ellipsis, fields=["notafield"]) with pytest.raises(IndexError): z.get_basic_selection(Ellipsis, fields=slice(None)) def test_set_selections_with_fields(): - v = [('aaa', 1, 4.2), - ('bbb', 2, 8.4), - ('ccc', 3, 12.6)] - v = np.array(v, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + v = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] + v = np.array(v, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.empty_like(v) z = zarr.empty_like(v, chunks=2) fields_fixture = [ - 'foo', + "foo", [], - ['foo'], - ['foo', 'bar'], - ['foo', 'baz'], - ['bar', 'baz'], - ['foo', 'bar', 'baz'], - ['bar', 'foo'], - ['baz', 'bar', 'foo'], + ["foo"], + ["foo", "bar"], + ["foo", "baz"], + ["bar", "baz"], + ["foo", "bar", "baz"], + ["bar", "foo"], + ["baz", "bar", "foo"], ] for fields in fields_fixture: @@ -1682,8 +1579,8 @@ def test_set_selections_with_fields(): key = fields # setup expectation - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) assert_array_equal(a, z[:]) a[key] = v[key] # total selection @@ -1691,31 +1588,31 @@ def test_set_selections_with_fields(): assert_array_equal(a, z[:]) # basic selection with slice - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) a[key][0:2] = v[key][0:2] z.set_basic_selection(slice(0, 2), v[key][0:2], fields=fields) assert_array_equal(a, z[:]) # orthogonal selection - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) ix = [0, 2] a[key][ix] = v[key][ix] z.set_orthogonal_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) # coordinate selection - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) ix = [0, 2] a[key][ix] = v[key][ix] z.set_coordinate_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) # mask selection - a[:] = ('', 0, 0) - z[:] = ('', 0, 0) + a[:] = ("", 0, 0) + z[:] = ("", 0, 0) ix = [True, False, True] a[key][ix] = v[key][ix] z.set_mask_selection(ix, v[key][ix], fields=fields) @@ -1823,17 +1720,24 @@ def test_numpy_int_indexing(): # 1D test cases ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), ((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]), - ((1070,), (50,), [ - ("__getitem__", (slice(200, 400),)), - ("__setitem__", (slice(200, 400, 100),)), - ]), - + ( + (1070,), + (50,), + [ + ("__getitem__", (slice(200, 400),)), + ("__setitem__", (slice(200, 400, 100),)), + ], + ), # 2D test cases - ((40, 50), (5, 8), [ - ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), - ("__setitem__", (slice(None), (slice(None)))), - ]), - ] + ( + (40, 50), + (5, 8), + [ + ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), + ("__setitem__", (slice(None), (slice(None)))), + ], + ), + ], ) def test_accessed_chunks(shape, chunks, ops): # Test that only the required chunks are accessed during basic selection operations @@ -1881,9 +1785,8 @@ def test_accessed_chunks(shape, chunks, ops): # don't determine if the chunk was actually partial here, just that the # counts are consistent that this might have happened if optype == "__setitem__": - assert ( - ("__getitem__", ci) not in delta_counts or - delta_counts.pop(("__getitem__", ci)) == 1 - ) + assert ("__getitem__", ci) not in delta_counts or delta_counts.pop( + ("__getitem__", ci) + ) == 1 # Check that no other chunks were accessed assert len(delta_counts) == 0 diff --git a/zarr/tests/test_info.py b/zarr/tests/test_info.py index 434d19d1f7..7fb6feb11b 100644 --- a/zarr/tests/test_info.py +++ b/zarr/tests/test_info.py @@ -5,22 +5,32 @@ from zarr.util import InfoReporter -@pytest.mark.parametrize('array_size', [10, 15000]) +@pytest.mark.parametrize("array_size", [10, 15000]) def test_info(array_size): # setup - g = zarr.group(store=dict(), chunk_store=dict(), - synchronizer=zarr.ThreadSynchronizer()) - g.create_group('foo') - z = g.zeros('bar', shape=array_size, filters=[numcodecs.Adler32()]) + g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) + g.create_group("foo") + z = g.zeros("bar", shape=array_size, filters=[numcodecs.Adler32()]) # test group info items = g.info_items() keys = sorted([k for k, _ in items]) - expected_keys = sorted([ - 'Type', 'Read-only', 'Synchronizer type', 'Store type', 'Chunk store type', - 'No. members', 'No. arrays', 'No. groups', 'Arrays', 'Groups', 'Name' - ]) + expected_keys = sorted( + [ + "Type", + "Read-only", + "Synchronizer type", + "Store type", + "Chunk store type", + "No. members", + "No. arrays", + "No. groups", + "Arrays", + "Groups", + "Name", + ] + ) assert expected_keys == keys # can also get a string representation of info via the info attribute @@ -30,11 +40,26 @@ def test_info(array_size): # test array info items = z.info_items() keys = sorted([k for k, _ in items]) - expected_keys = sorted([ - 'Type', 'Data type', 'Shape', 'Chunk shape', 'Order', 'Read-only', 'Filter [0]', - 'Compressor', 'Synchronizer type', 'Store type', 'Chunk store type', 'No. bytes', - 'No. bytes stored', 'Storage ratio', 'Chunks initialized', 'Name' - ]) + expected_keys = sorted( + [ + "Type", + "Data type", + "Shape", + "Chunk shape", + "Order", + "Read-only", + "Filter [0]", + "Compressor", + "Synchronizer type", + "Store type", + "Chunk store type", + "No. bytes", + "No. bytes stored", + "Storage ratio", + "Chunks initialized", + "Name", + ] + ) assert expected_keys == keys # can also get a string representation of info via the info attribute diff --git a/zarr/tests/test_meta.py b/zarr/tests/test_meta.py index a78375986e..db50560c8e 100644 --- a/zarr/tests/test_meta.py +++ b/zarr/tests/test_meta.py @@ -7,18 +7,27 @@ from zarr.codecs import Blosc, Delta, Pickle, Zlib from zarr.errors import MetadataError -from zarr.meta import (ZARR_FORMAT, decode_array_metadata, decode_dtype, - decode_group_metadata, encode_array_metadata, - encode_dtype, encode_fill_value, decode_fill_value, - get_extended_dtype_info, _v3_complex_types, - _v3_datetime_types, _default_entry_point_metadata_v3, - Metadata3) +from zarr.meta import ( + ZARR_FORMAT, + decode_array_metadata, + decode_dtype, + decode_group_metadata, + encode_array_metadata, + encode_dtype, + encode_fill_value, + decode_fill_value, + get_extended_dtype_info, + _v3_complex_types, + _v3_datetime_types, + _default_entry_point_metadata_v3, + Metadata3, +) from zarr.util import normalize_dtype, normalize_fill_value def assert_json_equal(expect, actual): if isinstance(actual, bytes): - actual = str(actual, 'ascii') + actual = str(actual, "ascii") ej = json.loads(expect) aj = json.loads(actual) assert ej == aj @@ -29,14 +38,15 @@ def test_encode_decode_array_1(): meta = dict( shape=(100,), chunks=(10,), - dtype=np.dtype('U4', 'U4", " 0: @@ -1399,8 +1416,7 @@ def s3(request): pass timeout -= 0.1 # pragma: no cover time.sleep(0.1) # pragma: no cover - s3so = dict(client_kwargs={'endpoint_url': endpoint_uri}, - use_listings_cache=False) + s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) s3 = s3fs.S3FileSystem(anon=False, **s3so) s3.mkdir("test") request.cls.s3so = s3so @@ -1410,7 +1426,6 @@ def s3(request): class TestNestedDirectoryStore(TestDirectoryStore): - def create_store(self, normalize_keys=False, **kwargs): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) @@ -1425,23 +1440,23 @@ def test_init_array(self): # check metadata assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] - assert meta['dimension_separator'] == "/" + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] + assert meta["dimension_separator"] == "/" def test_chunk_nesting(self): store = self.create_store() # any path where last segment looks like a chunk key gets special handling - store[self.root + '0.0'] = b'xxx' - assert b'xxx' == store[self.root + '0.0'] + store[self.root + "0.0"] = b"xxx" + assert b"xxx" == store[self.root + "0.0"] # assert b'xxx' == store['0/0'] - store[self.root + 'foo/10.20.30'] = b'yyy' - assert b'yyy' == store[self.root + 'foo/10.20.30'] + store[self.root + "foo/10.20.30"] = b"yyy" + assert b"yyy" == store[self.root + "foo/10.20.30"] # assert b'yyy' == store['foo/10/20/30'] - store[self.root + '42'] = b'zzz' - assert b'zzz' == store[self.root + '42'] + store[self.root + "42"] = b"zzz" + assert b"zzz" == store[self.root + "42"] def test_listdir(self): store = self.create_store() @@ -1452,29 +1467,22 @@ def test_listdir(self): class TestNestedDirectoryStoreNone: - def test_value_error(self): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = NestedDirectoryStore( - path, normalize_keys=True, - dimension_separator=None) + store = NestedDirectoryStore(path, normalize_keys=True, dimension_separator=None) assert store._dimension_separator == "/" class TestNestedDirectoryStoreWithWrongValue: - def test_value_error(self): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) with pytest.raises(ValueError): - NestedDirectoryStore( - path, normalize_keys=True, - dimension_separator=".") + NestedDirectoryStore(path, normalize_keys=True, dimension_separator=".") class TestN5Store(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False): path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) @@ -1486,29 +1494,29 @@ def test_equal(self): store_b = N5Store(store_a.path) assert store_a == store_b - @pytest.mark.parametrize('zarr_meta_key', ['.zarray', '.zattrs', '.zgroup']) + @pytest.mark.parametrize("zarr_meta_key", [".zarray", ".zattrs", ".zgroup"]) def test_del_zarr_meta_key(self, zarr_meta_key): store = self.create_store() - store[n5_attrs_key] = json_dumps({'foo': 'bar'}) + store[n5_attrs_key] = json_dumps({"foo": "bar"}) del store[zarr_meta_key] assert n5_attrs_key not in store def test_chunk_nesting(self): store = self.create_store() - store['0.0'] = b'xxx' - assert '0.0' in store - assert b'xxx' == store['0.0'] + store["0.0"] = b"xxx" + assert "0.0" in store + assert b"xxx" == store["0.0"] # assert b'xxx' == store['0/0'] - store['foo/10.20.30'] = b'yyy' - assert 'foo/10.20.30' in store - assert b'yyy' == store['foo/10.20.30'] + store["foo/10.20.30"] = b"yyy" + assert "foo/10.20.30" in store + assert b"yyy" == store["foo/10.20.30"] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] - del store['foo/10.20.30'] - assert 'foo/30/20/10' not in store - store['42'] = b'zzz' - assert '42' in store - assert b'zzz' == store['42'] + assert b"yyy" == store["foo/30/20/10"] + del store["foo/10.20.30"] + assert "foo/30/20/10" not in store + store["42"] = b"zzz" + assert "42" in store + assert b"zzz" == store["42"] def test_init_array(self): store = self.create_store() @@ -1517,83 +1525,85 @@ def test_init_array(self): # check metadata assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 - assert meta['dimension_separator'] == '.' + assert meta["fill_value"] == 0 + assert meta["dimension_separator"] == "." # Top-level groups AND arrays should have # the n5 keyword in metadata raw_n5_meta = json.loads(store[n5_attrs_key]) - assert raw_n5_meta.get('n5', None) == N5_FORMAT + assert raw_n5_meta.get("n5", None) == N5_FORMAT def test_init_array_path(self): - path = 'foo/bar' + path = "foo/bar" store = self.create_store() init_array(store, shape=1000, chunks=100, path=path) # check metadata - key = path + '/' + array_meta_key + key = path + "/" + array_meta_key assert key in store meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 + assert meta["fill_value"] == 0 def test_init_array_compat(self): store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor='none') + init_array(store, shape=1000, chunks=100, compressor="none") meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert compressor_config is None def test_init_array_overwrite(self): - self._test_init_array_overwrite('C') + self._test_init_array_overwrite("C") def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path('C') + self._test_init_array_overwrite_path("C") def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store('C') + self._test_init_array_overwrite_chunk_store("C") def test_init_group_overwrite(self): - self._test_init_group_overwrite('C') + self._test_init_group_overwrite("C") def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path('C') + self._test_init_group_overwrite_path("C") def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store('C') + self._test_init_group_overwrite_chunk_store("C") def test_init_group(self): store = self.create_store() init_group(store) - store['.zattrs'] = json_dumps({'foo': 'bar'}) + store[".zattrs"] = json_dumps({"foo": "bar"}) # check metadata assert group_meta_key in store assert group_meta_key in store.listdir() - assert group_meta_key in store.listdir('') + assert group_meta_key in store.listdir("") meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert ZARR_FORMAT == meta["zarr_format"] def test_filters(self): - all_filters, all_errors = zip(*[ - (None, does_not_raise()), - ([], does_not_raise()), - ([AsType('f4', 'f8')], pytest.raises(ValueError)), - ]) + all_filters, all_errors = zip( + *[ + (None, does_not_raise()), + ([], does_not_raise()), + ([AsType("f4", "f8")], pytest.raises(ValueError)), + ] + ) for filters, error in zip(all_filters, all_errors): store = self.create_store() with error: @@ -1620,29 +1630,29 @@ def test_equal(self): # be run by making TestN5FSStore inherit from both TestFSStore and # TestN5Store, but a direct copy is arguably more explicit. - @pytest.mark.parametrize('zarr_meta_key', ['.zarray', '.zattrs', '.zgroup']) + @pytest.mark.parametrize("zarr_meta_key", [".zarray", ".zattrs", ".zgroup"]) def test_del_zarr_meta_key(self, zarr_meta_key): store = self.create_store() - store[n5_attrs_key] = json_dumps({'foo': 'bar'}) + store[n5_attrs_key] = json_dumps({"foo": "bar"}) del store[zarr_meta_key] assert n5_attrs_key not in store def test_chunk_nesting(self): store = self.create_store() - store['0.0'] = b'xxx' - assert '0.0' in store - assert b'xxx' == store['0.0'] + store["0.0"] = b"xxx" + assert "0.0" in store + assert b"xxx" == store["0.0"] # assert b'xxx' == store['0/0'] - store['foo/10.20.30'] = b'yyy' - assert 'foo/10.20.30' in store - assert b'yyy' == store['foo/10.20.30'] + store["foo/10.20.30"] = b"yyy" + assert "foo/10.20.30" in store + assert b"yyy" == store["foo/10.20.30"] # N5 reverses axis order - assert b'yyy' == store['foo/30/20/10'] - del store['foo/10.20.30'] - assert 'foo/30/20/10' not in store - store['42'] = b'zzz' - assert '42' in store - assert b'zzz' == store['42'] + assert b"yyy" == store["foo/30/20/10"] + del store["foo/10.20.30"] + assert "foo/30/20/10" not in store + store["42"] = b"zzz" + assert "42" in store + assert b"zzz" == store["42"] def test_init_array(self): store = self.create_store() @@ -1651,88 +1661,90 @@ def test_init_array(self): # check metadata assert array_meta_key in store meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 - assert meta['dimension_separator'] == '.' + assert meta["fill_value"] == 0 + assert meta["dimension_separator"] == "." # Top-level groups AND arrays should have # the n5 keyword in metadata raw_n5_meta = json.loads(store[n5_attrs_key]) - assert raw_n5_meta.get('n5', None) == N5_FORMAT + assert raw_n5_meta.get("n5", None) == N5_FORMAT def test_init_array_path(self): - path = 'foo/bar' + path = "foo/bar" store = self.create_store() init_array(store, shape=1000, chunks=100, path=path) # check metadata - key = path + '/' + array_meta_key + key = path + "/" + array_meta_key assert key in store meta = store._metadata_class.decode_array_metadata(store[key]) - assert ZARR_FORMAT == meta['zarr_format'] - assert (1000,) == meta['shape'] - assert (100,) == meta['chunks'] - assert np.dtype(None) == meta['dtype'] + assert ZARR_FORMAT == meta["zarr_format"] + assert (1000,) == meta["shape"] + assert (100,) == meta["chunks"] + assert np.dtype(None) == meta["dtype"] # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert default_compressor.get_config() == compressor_config # N5Store always has a fill value of 0 - assert meta['fill_value'] == 0 + assert meta["fill_value"] == 0 def test_init_array_compat(self): store = self.create_store() - init_array(store, shape=1000, chunks=100, compressor='none') + init_array(store, shape=1000, chunks=100, compressor="none") meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) # N5Store wraps the actual compressor - compressor_config = meta['compressor']['compressor_config'] + compressor_config = meta["compressor"]["compressor_config"] assert compressor_config is None def test_init_array_overwrite(self): - self._test_init_array_overwrite('C') + self._test_init_array_overwrite("C") def test_init_array_overwrite_path(self): - self._test_init_array_overwrite_path('C') + self._test_init_array_overwrite_path("C") def test_init_array_overwrite_chunk_store(self): - self._test_init_array_overwrite_chunk_store('C') + self._test_init_array_overwrite_chunk_store("C") def test_init_group_overwrite(self): - self._test_init_group_overwrite('C') + self._test_init_group_overwrite("C") def test_init_group_overwrite_path(self): - self._test_init_group_overwrite_path('C') + self._test_init_group_overwrite_path("C") def test_init_group_overwrite_chunk_store(self): - self._test_init_group_overwrite_chunk_store('C') + self._test_init_group_overwrite_chunk_store("C") def test_dimension_separator(self): - with pytest.warns(UserWarning, match='dimension_separator'): - self.create_store(dimension_separator='/') + with pytest.warns(UserWarning, match="dimension_separator"): + self.create_store(dimension_separator="/") def test_init_group(self): store = self.create_store() init_group(store) - store['.zattrs'] = json_dumps({'foo': 'bar'}) + store[".zattrs"] = json_dumps({"foo": "bar"}) # check metadata assert group_meta_key in store assert group_meta_key in store.listdir() - assert group_meta_key in store.listdir('') + assert group_meta_key in store.listdir("") meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) - assert ZARR_FORMAT == meta['zarr_format'] + assert ZARR_FORMAT == meta["zarr_format"] def test_filters(self): - all_filters, all_errors = zip(*[ - (None, does_not_raise()), - ([], does_not_raise()), - ([AsType('f4', 'f8')], pytest.raises(ValueError)), - ]) + all_filters, all_errors = zip( + *[ + (None, does_not_raise()), + ([], does_not_raise()), + ([AsType("f4", "f8")], pytest.raises(ValueError)), + ] + ) for filters, error in zip(all_filters, all_errors): store = self.create_store() with error: @@ -1741,13 +1753,13 @@ def test_filters(self): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestNestedFSStore(TestNestedDirectoryStore): - def create_store(self, normalize_keys=False, path=None, **kwargs): if path is None: path = tempfile.mkdtemp() atexit.register(atexit_rmtree, path) - store = FSStore(path, normalize_keys=normalize_keys, - dimension_separator='/', auto_mkdir=True, **kwargs) + store = FSStore( + path, normalize_keys=normalize_keys, dimension_separator="/", auto_mkdir=True, **kwargs + ) return store def test_numbered_groups(self): @@ -1756,7 +1768,7 @@ def test_numbered_groups(self): # Create an array store = self.create_store() group = zarr.group(store=store) - arr = group.create_dataset('0', shape=(10, 10)) + arr = group.create_dataset("0", shape=(10, 10)) arr[1] = 1 # Read it back @@ -1765,7 +1777,6 @@ def test_numbered_groups(self): class TestTempStore(StoreTests): - def create_store(self, **kwargs): skip_if_nested_chunks(**kwargs) return TempStore(**kwargs) @@ -1780,113 +1791,111 @@ class TestZipStore(StoreTests): ZipStoreClass = ZipStore def create_store(self, **kwargs): - path = mktemp(suffix='.zip') + path = mktemp(suffix=".zip") atexit.register(os.remove, path) - store = ZipStore(path, mode='w', **kwargs) + store = ZipStore(path, mode="w", **kwargs) return store def test_mode(self): - with self.ZipStoreClass('data/store.zip', mode='w') as store: - store[self.root + 'foo'] = b'bar' - store = self.ZipStoreClass('data/store.zip', mode='r') + with self.ZipStoreClass("data/store.zip", mode="w") as store: + store[self.root + "foo"] = b"bar" + store = self.ZipStoreClass("data/store.zip", mode="r") with pytest.raises(PermissionError): - store[self.root + 'foo'] = b'bar' + store[self.root + "foo"] = b"bar" with pytest.raises(PermissionError): store.clear() def test_flush(self): - store = self.ZipStoreClass('data/store.zip', mode='w') - store[self.root + 'foo'] = b'bar' + store = self.ZipStoreClass("data/store.zip", mode="w") + store[self.root + "foo"] = b"bar" store.flush() - assert store[self.root + 'foo'] == b'bar' + assert store[self.root + "foo"] == b"bar" store.close() - store = self.ZipStoreClass('data/store.zip', mode='r') + store = self.ZipStoreClass("data/store.zip", mode="r") store.flush() # no-op def test_context_manager(self): with self.create_store() as store: - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'qux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"qux" assert 2 == len(store) def test_pop(self): # override because not implemented store = self.create_store() - store[self.root + 'foo'] = b'bar' + store[self.root + "foo"] = b"bar" with pytest.raises(NotImplementedError): - store.pop(self.root + 'foo') + store.pop(self.root + "foo") def test_popitem(self): # override because not implemented store = self.create_store() - store[self.root + 'foo'] = b'bar' + store[self.root + "foo"] = b"bar" with pytest.raises(NotImplementedError): store.popitem() def test_permissions(self): - store = self.ZipStoreClass('data/store.zip', mode='w') - foo_key = 'foo' if self.version == 2 else self.root + 'foo' + store = self.ZipStoreClass("data/store.zip", mode="w") + foo_key = "foo" if self.version == 2 else self.root + "foo" # TODO: cannot provide key ending in / for v3 # how to create an empty folder in that case? - baz_key = 'baz/' if self.version == 2 else self.root + 'baz' - store[foo_key] = b'bar' - store[baz_key] = b'' + baz_key = "baz/" if self.version == 2 else self.root + "baz" + store[foo_key] = b"bar" + store[baz_key] = b"" store.flush() store.close() - z = ZipFile('data/store.zip', 'r') + z = ZipFile("data/store.zip", "r") info = z.getinfo(foo_key) perm = oct(info.external_attr >> 16) - assert perm == '0o644' + assert perm == "0o644" info = z.getinfo(baz_key) perm = oct(info.external_attr >> 16) # only for posix platforms - if os.name == 'posix': + if os.name == "posix": if self.version == 2: - assert perm == '0o40775' + assert perm == "0o40775" else: # baz/ on v2, but baz on v3, so not a directory - assert perm == '0o644' + assert perm == "0o644" z.close() def test_store_and_retrieve_ndarray(self): - store = ZipStore('data/store.zip') + store = ZipStore("data/store.zip") x = np.array([[1, 2], [3, 4]]) - store['foo'] = x - y = np.frombuffer(store['foo'], dtype=x.dtype).reshape(x.shape) + store["foo"] = x + y = np.frombuffer(store["foo"], dtype=x.dtype).reshape(x.shape) assert np.array_equiv(y, x) class TestDBMStore(StoreTests): - def create_store(self, dimension_separator=None): - path = mktemp(suffix='.anydbm') - atexit.register(atexit_rmglob, path + '*') + path = mktemp(suffix=".anydbm") + atexit.register(atexit_rmglob, path + "*") # create store using default dbm implementation - store = DBMStore(path, flag='n', dimension_separator=dimension_separator) + store = DBMStore(path, flag="n", dimension_separator=dimension_separator) return store def test_context_manager(self): with self.create_store() as store: - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'qux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"qux" assert 2 == len(store) class TestDBMStoreDumb(TestDBMStore): - def create_store(self, **kwargs): - path = mktemp(suffix='.dumbdbm') - atexit.register(atexit_rmglob, path + '*') + path = mktemp(suffix=".dumbdbm") + atexit.register(atexit_rmglob, path + "*") import dbm.dumb as dumbdbm - store = DBMStore(path, flag='n', open=dumbdbm.open, **kwargs) + + store = DBMStore(path, flag="n", open=dumbdbm.open, **kwargs) return store class TestDBMStoreGnu(TestDBMStore): - def create_store(self, **kwargs): gdbm = pytest.importorskip("dbm.gnu") path = mktemp(suffix=".gdbm") # pragma: no cover @@ -1898,7 +1907,6 @@ def create_store(self, **kwargs): class TestDBMStoreNDBM(TestDBMStore): - def create_store(self, **kwargs): ndbm = pytest.importorskip("dbm.ndbm") path = mktemp(suffix=".ndbm") # pragma: no cover @@ -1908,20 +1916,18 @@ def create_store(self, **kwargs): class TestDBMStoreBerkeleyDB(TestDBMStore): - def create_store(self, **kwargs): bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix='.dbm') + path = mktemp(suffix=".dbm") atexit.register(os.remove, path) - store = DBMStore(path, flag='n', open=bsddb3.btopen, write_lock=False, **kwargs) + store = DBMStore(path, flag="n", open=bsddb3.btopen, write_lock=False, **kwargs) return store class TestLMDBStore(StoreTests): - def create_store(self, **kwargs): pytest.importorskip("lmdb") - path = mktemp(suffix='.lmdb') + path = mktemp(suffix=".lmdb") atexit.register(atexit_rmtree, path) buffers = True store = LMDBStore(path, buffers=buffers, **kwargs) @@ -1929,43 +1935,41 @@ def create_store(self, **kwargs): def test_context_manager(self): with self.create_store() as store: - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'qux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"qux" assert 2 == len(store) class TestSQLiteStore(StoreTests): - def create_store(self, **kwargs): pytest.importorskip("sqlite3") - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path, **kwargs) return store def test_underscore_in_name(self): - path = mktemp(suffix='.db') + path = mktemp(suffix=".db") atexit.register(atexit_rmtree, path) store = SQLiteStore(path) - store['a'] = b'aaa' - store['a_b'] = b'aa_bb' - store.rmdir('a') - assert 'a_b' in store + store["a"] = b"aaa" + store["a_b"] = b"aa_bb" + store.rmdir("a") + assert "a_b" in store class TestSQLiteStoreInMemory(TestSQLiteStore): - def create_store(self, **kwargs): pytest.importorskip("sqlite3") - store = SQLiteStore(':memory:', **kwargs) + store = SQLiteStore(":memory:", **kwargs) return store def test_pickle(self): # setup store store = self.create_store() - store[self.root + 'foo'] = b'bar' - store[self.root + 'baz'] = b'quux' + store[self.root + "foo"] = b"bar" + store[self.root + "baz"] = b"quux" # round-trip through pickle with pytest.raises(PicklingError): @@ -1974,11 +1978,11 @@ def test_pickle(self): @skip_test_env_var("ZARR_TEST_MONGO") class TestMongoDBStore(StoreTests): - def create_store(self, **kwargs): pytest.importorskip("pymongo") - store = MongoDBStore(host='127.0.0.1', database='zarr_tests', - collection='zarr_tests', **kwargs) + store = MongoDBStore( + host="127.0.0.1", database="zarr_tests", collection="zarr_tests", **kwargs + ) # start with an empty store store.clear() return store @@ -1986,12 +1990,11 @@ def create_store(self, **kwargs): @skip_test_env_var("ZARR_TEST_REDIS") class TestRedisStore(StoreTests): - def create_store(self, **kwargs): # TODO: this is the default host for Redis on Travis, # we probably want to generalize this though pytest.importorskip("redis") - store = RedisStore(host='localhost', port=6379, **kwargs) + store = RedisStore(host="localhost", port=6379, **kwargs) # start with an empty store store.clear() return store @@ -2011,14 +2014,14 @@ def test_cache_values_no_max_size(self): # setup store store = self.CountingClass() - foo_key = self.root + 'foo' - bar_key = self.root + 'bar' - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__getitem__', foo_key] - assert 1 == store.counter['__setitem__', foo_key] - assert 0 == store.counter['__getitem__', bar_key] - assert 1 == store.counter['__setitem__', bar_key] + foo_key = self.root + "foo" + bar_key = self.root + "bar" + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__getitem__", foo_key] + assert 1 == store.counter["__setitem__", foo_key] + assert 0 == store.counter["__getitem__", bar_key] + assert 1 == store.counter["__setitem__", bar_key] # setup cache cache = self.LRUStoreClass(store, max_size=None) @@ -2026,39 +2029,39 @@ def test_cache_values_no_max_size(self): assert 0 == cache.misses # test first __getitem__, cache miss - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] - assert 1 == store.counter['__setitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] + assert 1 == store.counter["__setitem__", foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second __getitem__, cache hit - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] - assert 1 == store.counter['__setitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] + assert 1 == store.counter["__setitem__", foo_key] assert 1 == cache.hits assert 1 == cache.misses # test __setitem__, __getitem__ - cache[foo_key] = b'zzz' - assert 1 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + cache[foo_key] = b"zzz" + assert 1 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] # should be a cache hit - assert b'zzz' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + assert b"zzz" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] assert 2 == cache.hits assert 1 == cache.misses # manually invalidate all cached values cache.invalidate_values() - assert b'zzz' == cache[foo_key] - assert 2 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + assert b"zzz" == cache[foo_key] + assert 2 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] cache.invalidate() - assert b'zzz' == cache[foo_key] - assert 3 == store.counter['__getitem__', foo_key] - assert 2 == store.counter['__setitem__', foo_key] + assert b"zzz" == cache[foo_key] + assert 3 == store.counter["__getitem__", foo_key] + assert 2 == store.counter["__setitem__", foo_key] # test __delitem__ del cache[foo_key] @@ -2070,104 +2073,104 @@ def test_cache_values_no_max_size(self): store[foo_key] # verify other keys untouched - assert 0 == store.counter['__getitem__', bar_key] - assert 1 == store.counter['__setitem__', bar_key] + assert 0 == store.counter["__getitem__", bar_key] + assert 1 == store.counter["__setitem__", bar_key] def test_cache_values_with_max_size(self): # setup store store = self.CountingClass() - foo_key = self.root + 'foo' - bar_key = self.root + 'bar' - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__getitem__', foo_key] - assert 0 == store.counter['__getitem__', bar_key] + foo_key = self.root + "foo" + bar_key = self.root + "bar" + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__getitem__", foo_key] + assert 0 == store.counter["__getitem__", bar_key] # setup cache - can only hold one item cache = self.LRUStoreClass(store, max_size=5) assert 0 == cache.hits assert 0 == cache.misses # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 1 == cache.hits assert 1 == cache.misses # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 1 == cache.hits assert 2 == cache.misses # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 2 == cache.hits assert 2 == cache.misses # test 'foo' __getitem__, should have been evicted, cache miss - assert b'xxx' == cache[foo_key] - assert 2 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 2 == store.counter["__getitem__", foo_key] assert 2 == cache.hits assert 3 == cache.misses # test 'bar' __getitem__, should have been evicted, cache miss - assert b'yyy' == cache[bar_key] - assert 2 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 2 == store.counter["__getitem__", bar_key] assert 2 == cache.hits assert 4 == cache.misses # setup store store = self.CountingClass() - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__getitem__', foo_key] - assert 0 == store.counter['__getitem__', bar_key] + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__getitem__", foo_key] + assert 0 == store.counter["__getitem__", bar_key] # setup cache - can hold two items cache = self.LRUStoreClass(store, max_size=6) assert 0 == cache.hits assert 0 == cache.misses # test first 'foo' __getitem__, cache miss - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 0 == cache.hits assert 1 == cache.misses # test second 'foo' __getitem__, cache hit - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 1 == cache.hits assert 1 == cache.misses # test first 'bar' __getitem__, cache miss - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 1 == cache.hits assert 2 == cache.misses # test second 'bar' __getitem__, cache hit - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 2 == cache.hits assert 2 == cache.misses # test 'foo' __getitem__, should still be cached - assert b'xxx' == cache[foo_key] - assert 1 == store.counter['__getitem__', foo_key] + assert b"xxx" == cache[foo_key] + assert 1 == store.counter["__getitem__", foo_key] assert 3 == cache.hits assert 2 == cache.misses # test 'bar' __getitem__, should still be cached - assert b'yyy' == cache[bar_key] - assert 1 == store.counter['__getitem__', bar_key] + assert b"yyy" == cache[bar_key] + assert 1 == store.counter["__getitem__", bar_key] assert 4 == cache.hits assert 2 == cache.misses @@ -2175,78 +2178,78 @@ def test_cache_keys(self): # setup store = self.CountingClass() - foo_key = self.root + 'foo' - bar_key = self.root + 'bar' - baz_key = self.root + 'baz' - store[foo_key] = b'xxx' - store[bar_key] = b'yyy' - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] - assert 0 == store.counter['keys'] + foo_key = self.root + "foo" + bar_key = self.root + "bar" + baz_key = self.root + "baz" + store[foo_key] = b"xxx" + store[bar_key] = b"yyy" + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] + assert 0 == store.counter["keys"] cache = self.LRUStoreClass(store, max_size=None) # keys should be cached on first call keys = sorted(cache.keys()) assert keys == [bar_key, foo_key] - assert 1 == store.counter['keys'] + assert 1 == store.counter["keys"] # keys should now be cached assert keys == sorted(cache.keys()) - assert 1 == store.counter['keys'] + assert 1 == store.counter["keys"] assert foo_key in cache - assert 0 == store.counter['__contains__', foo_key] + assert 0 == store.counter["__contains__", foo_key] assert keys == sorted(cache) - assert 0 == store.counter['__iter__'] - assert 1 == store.counter['keys'] + assert 0 == store.counter["__iter__"] + assert 1 == store.counter["keys"] # cache should be cleared if store is modified - crude but simple for now - cache[baz_key] = b'zzz' + cache[baz_key] = b"zzz" keys = sorted(cache.keys()) assert keys == [bar_key, baz_key, foo_key] - assert 2 == store.counter['keys'] + assert 2 == store.counter["keys"] # keys should now be cached assert keys == sorted(cache.keys()) - assert 2 == store.counter['keys'] + assert 2 == store.counter["keys"] # manually invalidate keys cache.invalidate_keys() keys = sorted(cache.keys()) assert keys == [bar_key, baz_key, foo_key] - assert 3 == store.counter['keys'] - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] + assert 3 == store.counter["keys"] + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] cache.invalidate_keys() keys = sorted(cache) assert keys == [bar_key, baz_key, foo_key] - assert 4 == store.counter['keys'] - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] + assert 4 == store.counter["keys"] + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] cache.invalidate_keys() assert foo_key in cache - assert 5 == store.counter['keys'] - assert 0 == store.counter['__contains__', foo_key] - assert 0 == store.counter['__iter__'] + assert 5 == store.counter["keys"] + assert 0 == store.counter["__contains__", foo_key] + assert 0 == store.counter["__iter__"] # check these would get counted if called directly assert foo_key in store - assert 1 == store.counter['__contains__', foo_key] + assert 1 == store.counter["__contains__", foo_key] assert keys == sorted(store) - assert 1 == store.counter['__iter__'] + assert 1 == store.counter["__iter__"] def test_getsize(): store = KVStore(dict()) - store['foo'] = b'aaa' - store['bar'] = b'bbbb' - store['baz/quux'] = b'ccccc' + store["foo"] = b"aaa" + store["bar"] = b"bbbb" + store["baz/quux"] = b"ccccc" assert 7 == getsize(store) - assert 5 == getsize(store, 'baz') + assert 5 == getsize(store, "baz") store = KVStore(dict()) - store['boo'] = None + store["boo"] = None assert -1 == getsize(store) -@pytest.mark.parametrize('dict_store', [False, True]) +@pytest.mark.parametrize("dict_store", [False, True]) def test_migrate_1to2(dict_store): from zarr import meta_v1 @@ -2258,64 +2261,63 @@ def test_migrate_1to2(dict_store): meta = dict( shape=(100,), chunks=(10,), - dtype=np.dtype('f4'), - compression='zlib', + dtype=np.dtype("f4"), + compression="zlib", compression_opts=1, fill_value=None, - order='C' + order="C", ) meta_json = meta_v1.encode_metadata(meta) - store['meta'] = meta_json - store['attrs'] = json.dumps(dict()).encode('ascii') + store["meta"] = meta_json + store["attrs"] = json.dumps(dict()).encode("ascii") # run migration migrate_1to2(store) # check results - assert 'meta' not in store + assert "meta" not in store assert array_meta_key in store - assert 'attrs' not in store + assert "attrs" not in store assert attrs_key in store meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 2 == meta_migrated['zarr_format'] + assert 2 == meta_migrated["zarr_format"] # preserved fields - for f in 'shape', 'chunks', 'dtype', 'fill_value', 'order': + for f in "shape", "chunks", "dtype", "fill_value", "order": assert meta[f] == meta_migrated[f] # migrate should have added empty filters field - assert meta_migrated['filters'] is None + assert meta_migrated["filters"] is None # check compression and compression_opts migrated to compressor - assert 'compression' not in meta_migrated - assert 'compression_opts' not in meta_migrated - assert meta_migrated['compressor'] == Zlib(1).get_config() + assert "compression" not in meta_migrated + assert "compression_opts" not in meta_migrated + assert meta_migrated["compressor"] == Zlib(1).get_config() # check dict compression_opts store = dict() if dict_store else KVStore(dict()) - meta['compression'] = 'blosc' - meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1) + meta["compression"] = "blosc" + meta["compression_opts"] = dict(cname="lz4", clevel=5, shuffle=1) meta_json = meta_v1.encode_metadata(meta) - store['meta'] = meta_json - store['attrs'] = json.dumps(dict()).encode('ascii') + store["meta"] = meta_json + store["attrs"] = json.dumps(dict()).encode("ascii") migrate_1to2(store) meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 'compression' not in meta_migrated - assert 'compression_opts' not in meta_migrated - assert (meta_migrated['compressor'] == - Blosc(cname='lz4', clevel=5, shuffle=1).get_config()) + assert "compression" not in meta_migrated + assert "compression_opts" not in meta_migrated + assert meta_migrated["compressor"] == Blosc(cname="lz4", clevel=5, shuffle=1).get_config() # check 'none' compression is migrated to None (null in JSON) store = dict() if dict_store else KVStore(dict()) - meta['compression'] = 'none' + meta["compression"] = "none" meta_json = meta_v1.encode_metadata(meta) - store['meta'] = meta_json - store['attrs'] = json.dumps(dict()).encode('ascii') + store["meta"] = meta_json + store["attrs"] = json.dumps(dict()).encode("ascii") migrate_1to2(store) meta_migrated = decode_array_metadata(store[array_meta_key]) - assert 'compression' not in meta_migrated - assert 'compression_opts' not in meta_migrated - assert meta_migrated['compressor'] is None + assert "compression" not in meta_migrated + assert "compression_opts" not in meta_migrated + assert meta_migrated["compressor"] is None def test_format_compatibility(): @@ -2324,71 +2326,75 @@ def test_format_compatibility(): # read data stored with a previous minor version (which should be format-compatible). # fixture data - fixture = group(store=DirectoryStore('fixture')) + fixture = group(store=DirectoryStore("fixture")) # set seed to get consistent random data np.random.seed(42) arrays_chunks = [ - (np.arange(1111, dtype=' 2 else '' + prefix = meta_root if self.version > 2 else "" # setup some values - store[prefix + 'a'] = b'aaa' - store[prefix + 'b'] = b'bbb' - store[prefix + 'c/d'] = b'ddd' - store[prefix + 'c/e/f'] = b'fff' + store[prefix + "a"] = b"aaa" + store[prefix + "b"] = b"bbb" + store[prefix + "c/d"] = b"ddd" + store[prefix + "c/e/f"] = b"fff" # test iterators on store with data assert 4 == len(store) - keys = [prefix + 'a', prefix + 'b', prefix + 'c/d', prefix + 'c/e/f'] - values = [b'aaa', b'bbb', b'ddd', b'fff'] + keys = [prefix + "a", prefix + "b", prefix + "c/d", prefix + "c/e/f"] + values = [b"aaa", b"bbb", b"ddd", b"fff"] items = list(zip(keys, values)) assert set(keys) == set(store) assert set(keys) == set(store.keys()) @@ -2483,7 +2489,7 @@ class TestConsolidatedMetadataStore: @property def metadata_key(self): - return '.zmetadata' + return ".zmetadata" def test_bad_format(self): @@ -2491,7 +2497,7 @@ def test_bad_format(self): store = dict() consolidated = { # bad format version - 'zarr_consolidated_format': 0, + "zarr_consolidated_format": 0, } store[self.metadata_key] = json.dumps(consolidated).encode() @@ -2508,11 +2514,11 @@ def test_read_write(self): # setup store with consolidated metadata store = dict() consolidated = { - 'zarr_consolidated_format': 1, - 'metadata': { - 'foo': 'bar', - 'baz': 42, - } + "zarr_consolidated_format": 1, + "metadata": { + "foo": "bar", + "baz": 42, + }, } store[self.metadata_key] = json.dumps(consolidated).encode() @@ -2520,15 +2526,15 @@ def test_read_write(self): cs = self.ConsolidatedMetadataClass(store) # test __contains__, __getitem__ - for key, value in consolidated['metadata'].items(): + for key, value in consolidated["metadata"].items(): assert key in cs assert value == cs[key] # test __delitem__, __setitem__ with pytest.raises(PermissionError): - del cs['foo'] + del cs["foo"] with pytest.raises(PermissionError): - cs['bar'] = 0 + cs["bar"] = 0 with pytest.raises(PermissionError): cs["spam"] = "eggs" @@ -2558,16 +2564,16 @@ def test_normalize_store_arg(tmpdir): with pytest.raises(ValueError): normalize_store_arg(dict(), zarr_version=4) - for ext, Class in [('.zip', ZipStore), ('.n5', N5Store)]: - fn = tmpdir.join('store' + ext) - store = normalize_store_arg(str(fn), zarr_version=2, mode='w') + for ext, Class in [(".zip", ZipStore), (".n5", N5Store)]: + fn = tmpdir.join("store" + ext) + store = normalize_store_arg(str(fn), zarr_version=2, mode="w") assert isinstance(store, Class) if have_fsspec: import fsspec path = tempfile.mkdtemp() - store = normalize_store_arg("file://" + path, zarr_version=2, mode='w') + store = normalize_store_arg("file://" + path, zarr_version=2, mode="w") assert isinstance(store, FSStore) store = normalize_store_arg(fsspec.get_mapper("file://" + path)) @@ -2578,7 +2584,7 @@ def test_meta_prefix_6853(): fixture = pathlib.Path(zarr.__file__).resolve().parent.parent / "fixture" meta = fixture / "meta" - if not meta.exists(): # pragma: no cover + if not meta.exists(): # pragma: no cover s = DirectoryStore(str(meta), dimension_separator=".") a = zarr.open(store=s, mode="w", shape=(2, 2), dtype="' == actual[-8:] + assert "" == actual[-8:] def test_tree_get_icon(): @@ -184,15 +198,13 @@ def test_tree_widget_missing_ipytree(): "to get the required ipytree dependency for displaying the tree " "widget. If using jupyterlab<3, you also need to run " "`jupyter labextension install ipytree`" - ) + ) with pytest.raises(ImportError, match=re.escape(pattern)): tree_widget(None, None, None) def test_retry_call(): - class Fixture: - def __init__(self, pass_on=1): self.c = 0 self.pass_on = pass_on @@ -217,9 +229,27 @@ def fail(x): def test_flatten(): - assert list(flatten(['0', ['1', ['2', ['3', [4, ]]]]])) == ['0', '1', '2', '3', 4] - assert list(flatten('foo')) == ['f', 'o', 'o'] - assert list(flatten(['foo'])) == ['foo'] + assert list( + flatten( + [ + "0", + [ + "1", + [ + "2", + [ + "3", + [ + 4, + ], + ], + ], + ], + ] + ) + ) == ["0", "1", "2", "3", 4] + assert list(flatten("foo")) == ["f", "o", "o"] + assert list(flatten(["foo"])) == ["foo"] def test_all_equal(): @@ -232,11 +262,11 @@ def test_all_equal(): assert all_equal(np.nan, np.array([np.nan, np.nan])) assert not all_equal(np.nan, np.array([np.nan, 1.0])) - assert all_equal({'a': -1}, np.array([{'a': -1}, {'a': -1}], dtype='object')) - assert not all_equal({'a': -1}, np.array([{'a': -1}, {'a': 2}], dtype='object')) + assert all_equal({"a": -1}, np.array([{"a": -1}, {"a": -1}], dtype="object")) + assert not all_equal({"a": -1}, np.array([{"a": -1}, {"a": 2}], dtype="object")) - assert all_equal(np.timedelta64(999, 'D'), np.array([999, 999], dtype='timedelta64[D]')) - assert not all_equal(np.timedelta64(999, 'D'), np.array([999, 998], dtype='timedelta64[D]')) + assert all_equal(np.timedelta64(999, "D"), np.array([999, 999], dtype="timedelta64[D]")) + assert not all_equal(np.timedelta64(999, "D"), np.array([999, 998], dtype="timedelta64[D]")) # all_equal(None, *) always returns False assert not all_equal(None, np.array([None, None])) diff --git a/zarr/tests/util.py b/zarr/tests/util.py index 19ac8c0bfa..b4f00f703d 100644 --- a/zarr/tests/util.py +++ b/zarr/tests/util.py @@ -11,44 +11,43 @@ class CountingDict(Store): - def __init__(self): self.wrapped = dict() self.counter = collections.Counter() def __len__(self): - self.counter['__len__'] += 1 + self.counter["__len__"] += 1 return len(self.wrapped) def keys(self): - self.counter['keys'] += 1 + self.counter["keys"] += 1 return self.wrapped.keys() def __iter__(self): - self.counter['__iter__'] += 1 + self.counter["__iter__"] += 1 return iter(self.wrapped) def __contains__(self, item): - self.counter['__contains__', item] += 1 + self.counter["__contains__", item] += 1 return item in self.wrapped def __getitem__(self, item): - self.counter['__getitem__', item] += 1 + self.counter["__getitem__", item] += 1 return self.wrapped[item] def __setitem__(self, key, value): - self.counter['__setitem__', key] += 1 + self.counter["__setitem__", key] += 1 self.wrapped[key] = value def __delitem__(self, key): - self.counter['__delitem__', key] += 1 + self.counter["__delitem__", key] += 1 del self.wrapped[key] def getitems( self, keys: Sequence[str], *, contexts: Mapping[str, Context] ) -> Mapping[str, Any]: for key in keys: - self.counter['__getitem__', key] += 1 + self.counter["__getitem__", key] += 1 return {k: self.wrapped[k] for k in keys if k in self.wrapped} @@ -57,10 +56,9 @@ class CountingDictV3(CountingDict, StoreV3): def skip_test_env_var(name): - """ Checks for environment variables indicating whether tests requiring services should be run - """ - value = os.environ.get(name, '0') - return pytest.mark.skipif(value == '0', reason='Tests not enabled via environment variable') + """Checks for environment variables indicating whether tests requiring services should be run""" + value = os.environ.get(name, "0") + return pytest.mark.skipif(value == "0", reason="Tests not enabled via environment variable") try: diff --git a/zarr/util.py b/zarr/util.py index 6ba20b96c2..0ceafa8ef7 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -5,18 +5,7 @@ from textwrap import TextWrapper import mmap import time -from typing import ( - Any, - Callable, - Dict, - Iterator, - Mapping, - Optional, - Tuple, - TypeVar, - Union, - Iterable -) +from typing import Any, Callable, Dict, Iterator, Mapping, Optional, Tuple, TypeVar, Union, Iterable import numpy as np from asciitree import BoxStyle, LeftAligned @@ -25,14 +14,14 @@ ensure_text, ensure_ndarray_like, ensure_bytes, - ensure_contiguous_ndarray_like + ensure_contiguous_ndarray_like, ) from numcodecs.ndarray_like import NDArrayLike from numcodecs.registry import codec_registry from numcodecs.blosc import cbuffer_sizes, cbuffer_metainfo -KeyType = TypeVar('KeyType') -ValueType = TypeVar('ValueType') +KeyType = TypeVar("KeyType") +ValueType = TypeVar("ValueType") def flatten(arg: Iterable) -> Iterable: @@ -45,14 +34,13 @@ def flatten(arg: Iterable) -> Iterable: # codecs to use for object dtype convenience API object_codecs = { - str.__name__: 'vlen-utf8', - bytes.__name__: 'vlen-bytes', - 'array': 'vlen-array', + str.__name__: "vlen-utf8", + bytes.__name__: "vlen-bytes", + "array": "vlen-array", } class NumberEncoder(json.JSONEncoder): - def default(self, o): # See json.JSONEncoder.default docstring for explanation # This is necessary to encode numpy dtype @@ -65,20 +53,21 @@ def default(self, o): def json_dumps(o: Any) -> bytes: """Write JSON in a consistent, human-readable way.""" - return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True, - separators=(',', ': '), cls=NumberEncoder).encode('ascii') + return json.dumps( + o, indent=4, sort_keys=True, ensure_ascii=True, separators=(",", ": "), cls=NumberEncoder + ).encode("ascii") def json_loads(s: Union[bytes, str]) -> Dict[str, Any]: """Read JSON in a consistent way.""" - return json.loads(ensure_text(s, 'utf-8')) + return json.loads(ensure_text(s, "utf-8")) def normalize_shape(shape) -> Tuple[int]: """Convenience function to normalize the `shape` argument.""" if shape is None: - raise TypeError('shape is None') + raise TypeError("shape is None") # handle 1D convenience form if isinstance(shape, numbers.Integral): @@ -91,9 +80,9 @@ def normalize_shape(shape) -> Tuple[int]: # code to guess chunk shape, adapted from h5py -CHUNK_BASE = 256*1024 # Multiplier by which chunks are adjusted -CHUNK_MIN = 128*1024 # Soft lower limit (128k) -CHUNK_MAX = 64*1024*1024 # Hard upper limit +CHUNK_BASE = 256 * 1024 # Multiplier by which chunks are adjusted +CHUNK_MIN = 128 * 1024 # Soft lower limit (128k) +CHUNK_MAX = 64 * 1024 * 1024 # Hard upper limit def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: @@ -107,12 +96,12 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: ndims = len(shape) # require chunks to have non-zero length for all dimensions - chunks = np.maximum(np.array(shape, dtype='=f8'), 1) + chunks = np.maximum(np.array(shape, dtype="=f8"), 1) # Determine the optimal chunk size in bytes using a PyTables expression. # This is kept as a float. - dset_size = np.prod(chunks)*typesize - target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024))) + dset_size = np.prod(chunks) * typesize + target_size = CHUNK_BASE * (2 ** np.log10(dset_size / (1024.0 * 1024))) if target_size > CHUNK_MAX: target_size = CHUNK_MAX @@ -126,11 +115,11 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: # 1b. We're within 50% of the target chunk size, AND # 2. The chunk is smaller than the maximum chunk size - chunk_bytes = np.prod(chunks)*typesize + chunk_bytes = np.prod(chunks) * typesize - if (chunk_bytes < target_size or - abs(chunk_bytes-target_size)/target_size < 0.5) and \ - chunk_bytes < CHUNK_MAX: + if ( + chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 + ) and chunk_bytes < CHUNK_MAX: break if np.prod(chunks) == 1: @@ -142,9 +131,7 @@ def guess_chunks(shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: return tuple(int(x) for x in chunks) -def normalize_chunks( - chunks: Any, shape: Tuple[int, ...], typesize: int -) -> Tuple[int, ...]: +def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tuple[int, ...]: """Convenience function to normalize the `chunks` argument for an array with the given `shape`.""" @@ -164,17 +151,16 @@ def normalize_chunks( # handle bad dimensionality if len(chunks) > len(shape): - raise ValueError('too many dimensions in chunks') + raise ValueError("too many dimensions in chunks") # handle underspecified chunks if len(chunks) < len(shape): # assume chunks across remaining dimensions - chunks += shape[len(chunks):] + chunks += shape[len(chunks) :] # handle None or -1 in chunks if -1 in chunks or None in chunks: - chunks = tuple(s if c == -1 or c is None else int(c) - for s, c in zip(shape, chunks)) + chunks = tuple(s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks)) return tuple(chunks) @@ -186,30 +172,34 @@ def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype dtype = dtype.__name__ # type: ignore if isinstance(dtype, str): # allow ':' to delimit class from codec arguments - tokens = dtype.split(':') + tokens = dtype.split(":") key = tokens[0] if key in object_codecs: dtype = np.dtype(object) if object_codec is None: codec_id = object_codecs[key] if len(tokens) > 1: - args = tokens[1].split(',') + args = tokens[1].split(",") else: args = [] try: object_codec = codec_registry[codec_id](*args) except KeyError: # pragma: no cover - raise ValueError('codec %r for object type %r is not ' - 'available; please provide an ' - 'object_codec manually' % (codec_id, key)) + raise ValueError( + "codec %r for object type %r is not " + "available; please provide an " + "object_codec manually" % (codec_id, key) + ) return dtype, object_codec dtype = np.dtype(dtype) # don't allow generic datetime64 or timedelta64, require units to be specified - if dtype == np.dtype('M8') or dtype == np.dtype('m8'): - raise ValueError('datetime64 and timedelta64 dtypes with generic units ' - 'are not supported, please specify units (e.g., "M8[ns]")') + if dtype == np.dtype("M8") or dtype == np.dtype("m8"): + raise ValueError( + "datetime64 and timedelta64 dtypes with generic units " + 'are not supported, please specify units (e.g., "M8[ns]")' + ) return dtype, object_codec @@ -227,16 +217,17 @@ def is_total_slice(item, shape: Tuple[int]) -> bool: if item == slice(None): return True if isinstance(item, slice): - item = item, + item = (item,) if isinstance(item, tuple): return all( - (isinstance(s, slice) and - ((s == slice(None)) or - ((s.stop - s.start == l) and (s.step in [1, None])))) + ( + isinstance(s, slice) + and ((s == slice(None)) or ((s.stop - s.start == l) and (s.step in [1, None]))) + ) for s, l in zip(item, shape) ) else: - raise TypeError('expected slice or tuple of slices, found %r' % item) + raise TypeError("expected slice or tuple of slices, found %r" % item) def normalize_resize_args(old_shape, *args): @@ -251,33 +242,32 @@ def normalize_resize_args(old_shape, *args): else: new_shape = tuple(new_shape) if len(new_shape) != len(old_shape): - raise ValueError('new shape must have same number of dimensions') + raise ValueError("new shape must have same number of dimensions") # handle None in new_shape - new_shape = tuple(s if n is None else int(n) - for s, n in zip(old_shape, new_shape)) + new_shape = tuple(s if n is None else int(n) for s, n in zip(old_shape, new_shape)) return new_shape def human_readable_size(size) -> str: if size < 2**10: - return '%s' % size + return "%s" % size elif size < 2**20: - return '%.1fK' % (size / float(2**10)) + return "%.1fK" % (size / float(2**10)) elif size < 2**30: - return '%.1fM' % (size / float(2**20)) + return "%.1fM" % (size / float(2**20)) elif size < 2**40: - return '%.1fG' % (size / float(2**30)) + return "%.1fG" % (size / float(2**30)) elif size < 2**50: - return '%.1fT' % (size / float(2**40)) + return "%.1fT" % (size / float(2**40)) else: - return '%.1fP' % (size / float(2**50)) + return "%.1fP" % (size / float(2**50)) def normalize_order(order: str) -> str: order = str(order).upper() - if order not in ['C', 'F']: + if order not in ["C", "F"]: raise ValueError("order must be either 'C' or 'F', found: %r" % order) return order @@ -286,8 +276,7 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: if sep in (".", "/", None): return sep else: - raise ValueError( - "dimension_separator must be either '.' or '/', found: %r" % sep) + raise ValueError("dimension_separator must be either '.' or '/', found: %r" % sep) def normalize_fill_value(fill_value, dtype: np.dtype): @@ -300,17 +289,19 @@ def normalize_fill_value(fill_value, dtype: np.dtype): # structured arrays fill_value = np.zeros((), dtype=dtype)[()] - elif dtype.kind == 'U': + elif dtype.kind == "U": # special case unicode because of encoding issues on Windows if passed through numpy # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713 if not isinstance(fill_value, str): - raise ValueError('fill_value {!r} is not valid for dtype {}; must be a ' - 'unicode string'.format(fill_value, dtype)) + raise ValueError( + "fill_value {!r} is not valid for dtype {}; must be a " + "unicode string".format(fill_value, dtype) + ) else: try: - if isinstance(fill_value, bytes) and dtype.kind == 'V': + if isinstance(fill_value, bytes) and dtype.kind == "V": # special case for numpy 1.14 compatibility fill_value = np.array(fill_value, dtype=dtype.str).view(dtype)[()] else: @@ -318,8 +309,10 @@ def normalize_fill_value(fill_value, dtype: np.dtype): except Exception as e: # re-raise with our own error message to be helpful - raise ValueError('fill_value {!r} is not valid for dtype {}; nested ' - 'exception: {}'.format(fill_value, dtype, e)) + raise ValueError( + "fill_value {!r} is not valid for dtype {}; nested " + "exception: {}".format(fill_value, dtype, e) + ) return fill_value @@ -328,7 +321,7 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: # handle bytes if isinstance(path, bytes): - path = str(path, 'ascii') + path = str(path, "ascii") # ensure str if path is not None and not isinstance(path, str): @@ -337,21 +330,21 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: if path: # convert backslash to forward slash - path = path.replace('\\', '/') + path = path.replace("\\", "/") # ensure no leading slash - while len(path) > 0 and path[0] == '/': + while len(path) > 0 and path[0] == "/": path = path[1:] # ensure no trailing slash - while len(path) > 0 and path[-1] == '/': + while len(path) > 0 and path[-1] == "/": path = path[:-1] # collapse any repeated slashes previous_char = None - collapsed = '' + collapsed = "" for char in path: - if char == '/' and previous_char == '/': + if char == "/" and previous_char == "/": pass else: collapsed += char @@ -359,12 +352,12 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: path = collapsed # don't allow path segments with just '.' or '..' - segments = path.split('/') - if any(s in {'.', '..'} for s in segments): + segments = path.split("/") + if any(s in {".", ".."} for s in segments): raise ValueError("path containing '.' or '..' segment not allowed") else: - path = '' + path = "" return path @@ -376,32 +369,34 @@ def buffer_size(v) -> int: def info_text_report(items: Dict[Any, Any]) -> str: keys = [k for k, v in items] max_key_len = max(len(k) for k in keys) - report = '' + report = "" for k, v in items: - wrapper = TextWrapper(width=80, - initial_indent=k.ljust(max_key_len) + ' : ', - subsequent_indent=' '*max_key_len + ' : ') + wrapper = TextWrapper( + width=80, + initial_indent=k.ljust(max_key_len) + " : ", + subsequent_indent=" " * max_key_len + " : ", + ) text = wrapper.fill(str(v)) - report += text + '\n' + report += text + "\n" return report def info_html_report(items) -> str: report = '' - report += '' + report += "" for k, v in items: - report += '' \ - '' \ - '' \ - '' \ - % (k, v) - report += '' - report += '
%s%s
' + report += ( + "" + '%s' + '%s' + "" % (k, v) + ) + report += "" + report += "" return report class InfoReporter: - def __init__(self, obj): self.obj = obj @@ -415,24 +410,22 @@ def _repr_html_(self): class TreeNode: - def __init__(self, obj, depth=0, level=None): self.obj = obj self.depth = depth self.level = level def get_children(self): - if hasattr(self.obj, 'values'): + if hasattr(self.obj, "values"): if self.level is None or self.depth < self.level: depth = self.depth + 1 - return [TreeNode(o, depth=depth, level=self.level) - for o in self.obj.values()] + return [TreeNode(o, depth=depth, level=self.level) for o in self.obj.values()] return [] def get_text(self): name = self.obj.name.split("/")[-1] or "/" - if hasattr(self.obj, 'shape'): - name += ' {} {}'.format(self.obj.shape, self.obj.dtype) + if hasattr(self.obj, "shape"): + name += " {} {}".format(self.obj.shape, self.obj.dtype) return name def get_type(self): @@ -440,7 +433,6 @@ def get_type(self): class TreeTraversal(Traversal): - def get_children(self, node): return node.get_children() @@ -451,8 +443,8 @@ def get_text(self, node): return node.get_text() -tree_group_icon = 'folder' -tree_array_icon = 'table' +tree_group_icon = "folder" +tree_array_icon = "table" def tree_get_icon(stype: str) -> str: @@ -499,37 +491,28 @@ def tree_widget(group, expand, level): class TreeViewer: - def __init__(self, group, expand=False, level=None): self.group = group self.expand = expand self.level = level - self.text_kwargs = dict( - horiz_len=2, - label_space=1, - indent=1 - ) + self.text_kwargs = dict(horiz_len=2, label_space=1, indent=1) self.bytes_kwargs = dict( - UP_AND_RIGHT="+", - HORIZONTAL="-", - VERTICAL="|", - VERTICAL_AND_RIGHT="+" + UP_AND_RIGHT="+", HORIZONTAL="-", VERTICAL="|", VERTICAL_AND_RIGHT="+" ) self.unicode_kwargs = dict( UP_AND_RIGHT="\u2514", HORIZONTAL="\u2500", VERTICAL="\u2502", - VERTICAL_AND_RIGHT="\u251C" + VERTICAL_AND_RIGHT="\u251C", ) def __bytes__(self): drawer = LeftAligned( - traverse=TreeTraversal(), - draw=BoxStyle(gfx=self.bytes_kwargs, **self.text_kwargs) + traverse=TreeTraversal(), draw=BoxStyle(gfx=self.bytes_kwargs, **self.text_kwargs) ) root = TreeNode(self.group, level=self.level) result = drawer(root) @@ -542,8 +525,7 @@ def __bytes__(self): def __unicode__(self): drawer = LeftAligned( - traverse=TreeTraversal(), - draw=BoxStyle(gfx=self.unicode_kwargs, **self.text_kwargs) + traverse=TreeTraversal(), draw=BoxStyle(gfx=self.unicode_kwargs, **self.text_kwargs) ) root = TreeNode(self.group, level=self.level) return drawer(root) @@ -557,16 +539,21 @@ def _repr_mimebundle_(self, **kwargs): def check_array_shape(param, array, shape): - if not hasattr(array, 'shape'): - raise TypeError('parameter {!r}: expected an array-like object, got {!r}' - .format(param, type(array))) + if not hasattr(array, "shape"): + raise TypeError( + "parameter {!r}: expected an array-like object, got {!r}".format(param, type(array)) + ) if array.shape != shape: - raise ValueError('parameter {!r}: expected array with shape {!r}, got {!r}' - .format(param, shape, array.shape)) + raise ValueError( + "parameter {!r}: expected array with shape {!r}, got {!r}".format( + param, shape, array.shape + ) + ) def is_valid_python_name(name): from keyword import iskeyword + return name.isidentifier() and not iskeyword(name) @@ -599,9 +586,9 @@ def __init__(self, store_key, chunk_store): self.read_blocks = set() _key_path = self.map._key_to_str(store_key) - _key_path = _key_path.split('/') + _key_path = _key_path.split("/") _chunk_path = [self.chunk_store._normalize_key(_key_path[-1])] - _key_path = '/'.join(_key_path[:-1] + _chunk_path) + _key_path = "/".join(_key_path[:-1] + _chunk_path) self.key_path = _key_path def prepare_chunk(self): @@ -613,21 +600,15 @@ def prepare_chunk(self): self.buff[0:16] = header self.nblocks = nbytes / blocksize self.nblocks = ( - int(self.nblocks) - if self.nblocks == int(self.nblocks) - else int(self.nblocks + 1) + int(self.nblocks) if self.nblocks == int(self.nblocks) else int(self.nblocks + 1) ) if self.nblocks == 1: self.buff = self.read_full() return - start_points_buffer = self.fs.read_block( - self.key_path, 16, int(self.nblocks * 4) - ) - self.start_points = np.frombuffer( - start_points_buffer, count=self.nblocks, dtype=np.int32 - ) + start_points_buffer = self.fs.read_block(self.key_path, 16, int(self.nblocks * 4)) + self.start_points = np.frombuffer(start_points_buffer, count=self.nblocks, dtype=np.int32) self.start_points_max = self.start_points.max() - self.buff[16: (16 + (self.nblocks * 4))] = start_points_buffer + self.buff[16 : (16 + (self.nblocks * 4))] = start_points_buffer self.n_per_block = blocksize / typesize def read_part(self, start, nitems): @@ -676,12 +657,14 @@ def read_full(self): return self.chunk_store[self.store_key] -def retry_call(callabl: Callable, - args=None, - kwargs=None, - exceptions: Tuple[Any, ...] = (), - retries: int = 10, - wait: float = 0.1) -> Any: +def retry_call( + callabl: Callable, + args=None, + kwargs=None, + exceptions: Tuple[Any, ...] = (), + retries: int = 10, + wait: float = 0.1, +) -> Any: """ Make several attempts to invoke the callable. If one of the given exceptions is raised, wait the given period of time and retry up to the given number of @@ -693,7 +676,7 @@ def retry_call(callabl: Callable, if kwargs is None: kwargs = {} - for attempt in range(1, retries+1): + for attempt in range(1, retries + 1): try: return callabl(*args, **kwargs) except exceptions: