Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,4 @@ tests/.hypothesis
.hypothesis/

zarr/version.py
zarr.egg-info/
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ repos:
# Package dependencies
- packaging
- donfig
- numcodecs[crc32c]
- numcodecs
- google-crc32c>=1.5
- numpy==2.1 # until https://github.com/numpy/numpy/issues/28034 is resolved
- typing_extensions
- universal-pathlib
Expand Down
1 change: 1 addition & 0 deletions changes/3515.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Replace `crc32c` dependency with `google-crc32c` to resolve licensing concerns. The `crc32c` library uses LGPL license, while `google-crc32c` uses the more permissive Apache 2.0 license. This change maintains full backward compatibility with existing CRC32C-encoded data.
3 changes: 2 additions & 1 deletion docs/user-guide/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ Required dependencies include:
- [Python](https://docs.python.org/3/) (3.11 or later)
- [packaging](https://packaging.pypa.io) (22.0 or later)
- [numpy](https://numpy.org) (1.26 or later)
- [numcodecs[crc32c]](https://numcodecs.readthedocs.io) (0.14 or later)
- [numcodecs](https://numcodecs.readthedocs.io) (0.14 or later)
- [google-crc32c](https://github.com/googleapis/python-crc32c) (1.5 or later)
- [typing_extensions](https://typing-extensions.readthedocs.io) (4.9 or later)
- [donfig](https://donfig.readthedocs.io) (0.8 or later)

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ requires-python = ">=3.11"
dependencies = [
'packaging>=22.0',
'numpy>=1.26',
'numcodecs[crc32c]>=0.14',
'numcodecs>=0.14',
'google-crc32c>=1.5',
'typing_extensions>=4.9',
'donfig>=0.8',
]
Expand Down
8 changes: 5 additions & 3 deletions src/zarr/codecs/crc32c_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, cast

import google_crc32c
import numpy as np
import typing_extensions
from crc32c import crc32c

from zarr.abc.codec import BytesBytesCodec
from zarr.core.common import JSON, parse_named_configuration
Expand Down Expand Up @@ -42,7 +42,7 @@ async def _decode_single(

# Need to do a manual cast until https://github.com/numpy/numpy/issues/26783 is resolved
computed_checksum = np.uint32(
crc32c(cast("typing_extensions.Buffer", inner_bytes))
google_crc32c.value(cast("typing_extensions.Buffer", inner_bytes))
).tobytes()
stored_checksum = bytes(crc32_bytes)
if computed_checksum != stored_checksum:
Expand All @@ -58,7 +58,9 @@ async def _encode_single(
) -> Buffer | None:
data = chunk_bytes.as_numpy_array()
# Calculate the checksum and "cast" it to a numpy array
checksum = np.array([crc32c(cast("typing_extensions.Buffer", data))], dtype=np.uint32)
checksum = np.array(
[google_crc32c.value(cast("typing_extensions.Buffer", data))], dtype=np.uint32
)
# Append the checksum (as bytes) to the data
return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B")))

Expand Down
21 changes: 19 additions & 2 deletions tests/test_codecs/test_numcodecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
import pytest
from numcodecs import GZip

try:
from numcodecs.errors import UnknownCodecError
except ImportError:
# Older versions of numcodecs don't have a separate errors module
UnknownCodecError = ValueError

from zarr import config, create_array, open_array
from zarr.abc.numcodec import _is_numcodec, _is_numcodec_cls
from zarr.codecs import numcodecs as _numcodecs
Expand Down Expand Up @@ -243,6 +249,13 @@ def test_generic_filter_packbits() -> None:
],
)
def test_generic_checksum(codec_class: type[_numcodecs._NumcodecsBytesBytesCodec]) -> None:
# Check if the codec is available in numcodecs
try:
with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR):
codec_class()._codec # noqa: B018
except UnknownCodecError as e: # pragma: no cover
pytest.skip(f"{codec_class.codec_name} is not available in numcodecs: {e}")

data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16))

with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR):
Expand Down Expand Up @@ -352,8 +365,12 @@ def test_to_dict() -> None:
],
)
def test_codecs_pickleable(codec_cls: type[_numcodecs._NumcodecsCodec]) -> None:
with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR):
codec = codec_cls()
# Check if the codec is available in numcodecs
try:
with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR):
codec = codec_cls()
except UnknownCodecError as e: # pragma: no cover
pytest.skip(f"{codec_cls.codec_name} is not available in numcodecs: {e}")

expected = codec

Expand Down