diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29852b5..759a84d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ jobs: shell: bash steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up JDK uses: actions/setup-java@v4 @@ -29,14 +29,18 @@ jobs: cache: maven - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' - - name: Install zarrita + - name: Install uv + uses: astral-sh/setup-uv@v6 + + - name: Set up zarr-python run: | - python -m venv venv_zarrita - if [ "${{ runner.os }}" = "Windows" ]; then venv_zarrita/Scripts/pip install zarrita; else venv_zarrita/bin/pip install zarrita; fi + uv venv && uv init + uv add zarr + uv add zarrita - name: Download testdata run: | diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index fa61257..47fbbca 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -32,14 +32,18 @@ jobs: # Begin copy from ci.yml. Refactor? - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.11' - - name: Install zarrita + - name: Install uv + uses: astral-sh/setup-uv@v6 + + - name: Set up zarr-python run: | - python -m venv venv_zarrita - if [ "${{ runner.os }}" = "Windows" ]; then venv_zarrita/Scripts/pip install zarrita; else venv_zarrita/bin/pip install zarrita; fi + uv venv && uv init + uv add zarr + uv add zarrita - name: Download testdata run: | diff --git a/.gitignore b/.gitignore index 4991720..2d55ee7 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,7 @@ build/ ### Custom ### /testdata/l4_sample /testoutput -/venv_zarrita +/.python-version +/main.py +/pyproject.toml +/uv.lock diff --git a/src/main/java/dev/zarr/zarrjava/core/Array.java b/src/main/java/dev/zarr/zarrjava/core/Array.java new file mode 100644 index 0000000..0401b6b --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/Array.java @@ -0,0 +1,338 @@ +package dev.zarr.zarrjava.core; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.store.StoreHandle; +import dev.zarr.zarrjava.utils.IndexingUtils; +import dev.zarr.zarrjava.utils.MultiArrayUtils; +import dev.zarr.zarrjava.utils.Utils; +import dev.zarr.zarrjava.core.codec.CodecPipeline; +import ucar.ma2.InvalidRangeException; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.stream.Stream; + +public interface Array { + + ArrayMetadata metadata(); + + StoreHandle storeHandle(); + + CodecPipeline codecPipeline(); + + /** + * Writes a ucar.ma2.Array into the Zarr array at a specified offset. The shape of the Zarr array + * needs be large enough for the write. + * + * @param offset the offset where to write the data + * @param array the data to write + * @param parallel utilizes parallelism if true + */ + default void write(long[] offset, ucar.ma2.Array array, boolean parallel) { + ArrayMetadata metadata = metadata(); + if (offset.length != metadata.ndim()) { + throw new IllegalArgumentException("'offset' needs to have rank '" + metadata.ndim() + "'."); + } + if (array.getRank() != metadata.ndim()) { + throw new IllegalArgumentException("'array' needs to have rank '" + metadata.ndim() + "'."); + } + + int[] shape = array.getShape(); + + final int[] chunkShape = metadata.chunkShape(); + Stream chunkStream = Arrays.stream(IndexingUtils.computeChunkCoords(metadata.shape(), chunkShape, offset, shape)); + if (parallel) { + chunkStream = chunkStream.parallel(); + } + chunkStream.forEach( + chunkCoords -> { + try { + final IndexingUtils.ChunkProjection chunkProjection = + IndexingUtils.computeProjection(chunkCoords, metadata.shape(), chunkShape, offset, + shape + ); + + ucar.ma2.Array chunkArray; + if (IndexingUtils.isFullChunk(chunkProjection.chunkOffset, chunkProjection.shape, + chunkShape + )) { + chunkArray = array.sectionNoReduce(chunkProjection.outOffset, + chunkProjection.shape, + null + ); + } else { + chunkArray = readChunk(chunkCoords); + MultiArrayUtils.copyRegion(array, chunkProjection.outOffset, chunkArray, + chunkProjection.chunkOffset, chunkProjection.shape + ); + } + writeChunk(chunkCoords, chunkArray); + } catch (ZarrException | InvalidRangeException e) { + throw new RuntimeException(e); + } + }); + + } + + /** + * Writes one chunk into the Zarr array as specified by the chunk coordinates. The shape of the + * Zarr array needs to be large enough to write. + * + * @param chunkCoords The coordinates of the chunk as computed by the offset of the chunk divided + * by the chunk shape. + * @param chunkArray The data to write into the chunk + * @throws ZarrException throws ZarrException if the write fails + */ + default void writeChunk(long[] chunkCoords, ucar.ma2.Array chunkArray) throws ZarrException { + ArrayMetadata metadata = metadata(); + String[] chunkKeys = metadata.chunkKeyEncoding().encodeChunkKey(chunkCoords); + StoreHandle chunkHandle = storeHandle().resolve(chunkKeys); + Object parsedFillValue = metadata.parsedFillValue(); + + if (parsedFillValue != null && MultiArrayUtils.allValuesEqual(chunkArray, parsedFillValue)) { + chunkHandle.delete(); + } else { + ByteBuffer chunkBytes = codecPipeline().encode(chunkArray); + chunkHandle.set(chunkBytes); + } + } + + /** + * Reads one chunk of the Zarr array as specified by the chunk coordinates into an + * ucar.ma2.Array. + * + * @param chunkCoords The coordinates of the chunk as computed by the offset of the chunk divided + * by the chunk shape. + * @throws ZarrException throws ZarrException if the requested chunk is outside the array's domain or if the read fails + */ + @Nonnull + default ucar.ma2.Array readChunk(long[] chunkCoords) + throws ZarrException { + ArrayMetadata metadata = metadata(); + if (!chunkIsInArray(chunkCoords)) { + throw new ZarrException("Attempting to read data outside of the array's domain."); + } + + final String[] chunkKeys = metadata.chunkKeyEncoding().encodeChunkKey(chunkCoords); + final StoreHandle chunkHandle = storeHandle().resolve(chunkKeys); + + ByteBuffer chunkBytes = chunkHandle.read(); + if (chunkBytes == null) { + return metadata.allocateFillValueChunk(); + } + + return codecPipeline().decode(chunkBytes); + } + + + /** + * Writes a ucar.ma2.Array into the Zarr array at the beginning of the Zarr array. The shape of + * the Zarr array needs be large enough for the write. + * Utilizes no parallelism. + * + * @param array the data to write + */ + default void write(ucar.ma2.Array array) { + write(new long[metadata().ndim()], array); + } + + /** + * Writes a ucar.ma2.Array into the Zarr array at a specified offset. The shape of the Zarr array + * needs be large enough for the write. + * Utilizes no parallelism. + * + * @param offset the offset where to write the data + * @param array the data to write + */ + default void write(long[] offset, ucar.ma2.Array array) { + write(offset, array, false); + } + + /** + * Writes a ucar.ma2.Array into the Zarr array at the beginning of the Zarr array. The shape of + * the Zarr array needs be large enough for the write. + * + * @param array the data to write + * @param parallel utilizes parallelism if true + */ + default void write(ucar.ma2.Array array, boolean parallel) { + write(new long[metadata().ndim()], array, parallel); + } + + /** + * Reads the entire Zarr array into an ucar.ma2.Array. + * Utilizes no parallelism. + * + * @throws ZarrException throws ZarrException if the read fails + */ + @Nonnull + default ucar.ma2.Array read() throws ZarrException { + return read(new long[metadata().ndim()], Utils.toIntArray(metadata().shape())); + } + + /** + * Reads a part of the Zarr array based on a requested offset and shape into an ucar.ma2.Array. + * Utilizes no parallelism. + * + * @param offset the offset where to start reading + * @param shape the shape of the data to read + * @throws ZarrException throws ZarrException if the requested data is outside the array's domain or if the read fails + */ + @Nonnull + default ucar.ma2.Array read(final long[] offset, final int[] shape) throws ZarrException { + return read(offset, shape, false); + } + + /** + * Reads the entire Zarr array into an ucar.ma2.Array. + * + * @param parallel utilizes parallelism if true + * @throws ZarrException throws ZarrException if the requested data is outside the array's domain or if the read fails + */ + @Nonnull + default ucar.ma2.Array read(final boolean parallel) throws ZarrException { + return read(new long[metadata().ndim()], Utils.toIntArray(metadata().shape()), parallel); + } + + default boolean chunkIsInArray(long[] chunkCoords) { + final int[] chunkShape = metadata().chunkShape(); + for (int dimIdx = 0; dimIdx < metadata().ndim(); dimIdx++) { + if (chunkCoords[dimIdx] < 0 + || chunkCoords[dimIdx] * chunkShape[dimIdx] >= metadata().shape()[dimIdx]) { + return false; + } + } + return true; + } + + /** + * Reads a part of the Zarr array based on a requested offset and shape into an ucar.ma2.Array. + * + * @param offset the offset where to start reading + * @param shape the shape of the data to read + * @param parallel utilizes parallelism if true + * @throws ZarrException throws ZarrException if the requested data is outside the array's domain or if the read fails + */ + @Nonnull + default ucar.ma2.Array read(final long[] offset, final int[] shape, final boolean parallel) throws ZarrException { + ArrayMetadata metadata = metadata(); + CodecPipeline codecPipeline = codecPipeline(); + if (offset.length != metadata.ndim()) { + throw new IllegalArgumentException("'offset' needs to have rank '" + metadata.ndim() + "'."); + } + if (shape.length != metadata.ndim()) { + throw new IllegalArgumentException("'shape' needs to have rank '" + metadata.ndim() + "'."); + } + for (int dimIdx = 0; dimIdx < metadata.ndim(); dimIdx++) { + if (offset[dimIdx] < 0 || offset[dimIdx] + shape[dimIdx] > metadata.shape()[dimIdx]) { + throw new ZarrException("Requested data is outside of the array's domain."); + } + } + + final int[] chunkShape = metadata.chunkShape(); + if (IndexingUtils.isSingleFullChunk(offset, shape, chunkShape)) { + return readChunk(IndexingUtils.computeSingleChunkCoords(offset, chunkShape)); + } + + final ucar.ma2.Array outputArray = ucar.ma2.Array.factory(metadata.dataType().getMA2DataType(), + shape); + Stream chunkStream = Arrays.stream(IndexingUtils.computeChunkCoords(metadata.shape(), chunkShape, offset, shape)); + if (parallel) { + chunkStream = chunkStream.parallel(); + } + chunkStream.forEach( + chunkCoords -> { + try { + final IndexingUtils.ChunkProjection chunkProjection = + IndexingUtils.computeProjection(chunkCoords, metadata.shape(), chunkShape, offset, + shape + ); + + if (chunkIsInArray(chunkCoords)) { + MultiArrayUtils.copyRegion(metadata.allocateFillValueChunk(), + chunkProjection.chunkOffset, outputArray, chunkProjection.outOffset, + chunkProjection.shape + ); + } + + final String[] chunkKeys = metadata.chunkKeyEncoding().encodeChunkKey(chunkCoords); + final StoreHandle chunkHandle = storeHandle().resolve(chunkKeys); + if (!chunkHandle.exists()) { + return; + } + if (codecPipeline.supportsPartialDecode()) { + final ucar.ma2.Array chunkArray = codecPipeline.decodePartial(chunkHandle, + Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape); + MultiArrayUtils.copyRegion(chunkArray, new int[metadata.ndim()], outputArray, + chunkProjection.outOffset, chunkProjection.shape + ); + } else { + MultiArrayUtils.copyRegion(readChunk(chunkCoords), chunkProjection.chunkOffset, + outputArray, chunkProjection.outOffset, chunkProjection.shape + ); + } + + } catch (ZarrException e) { + throw new RuntimeException(e); + } + }); + return outputArray; + } + + default ArrayAccessor access() { + return new ArrayAccessor(this); + } + + final class ArrayAccessor { + @Nullable + long[] offset; + @Nullable + int[] shape; + @Nonnull + Array array; + + public ArrayAccessor(@Nonnull Array array) { + this.array = array; + } + + @Nonnull + public ArrayAccessor withOffset(@Nonnull long... offset) { + this.offset = offset; + return this; + } + + + @Nonnull + public ArrayAccessor withShape(@Nonnull int... shape) { + this.shape = shape; + return this; + } + + @Nonnull + public ArrayAccessor withShape(@Nonnull long... shape) { + this.shape = Utils.toIntArray(shape); + return this; + } + + @Nonnull + public ucar.ma2.Array read() throws ZarrException { + if (offset == null) { + throw new ZarrException("`offset` needs to be set."); + } + if (shape == null) { + throw new ZarrException("`shape` needs to be set."); + } + return array.read(offset, shape); + } + + public void write(@Nonnull ucar.ma2.Array content) throws ZarrException { + if (offset == null) { + throw new ZarrException("`offset` needs to be set."); + } + array.write(offset, content); + } + + } +} \ No newline at end of file diff --git a/src/main/java/dev/zarr/zarrjava/core/ArrayMetadata.java b/src/main/java/dev/zarr/zarrjava/core/ArrayMetadata.java new file mode 100644 index 0000000..fc42294 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/ArrayMetadata.java @@ -0,0 +1,167 @@ +package dev.zarr.zarrjava.core; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.utils.MultiArrayUtils; +import dev.zarr.zarrjava.utils.Utils; +import dev.zarr.zarrjava.core.chunkkeyencoding.ChunkKeyEncoding; +import ucar.ma2.Array; + +import javax.annotation.Nonnull; +import java.nio.ByteBuffer; +import java.util.Arrays; + +public interface ArrayMetadata { + int ndim(); + + int[] chunkShape(); + + long[] shape(); + + DataType dataType(); + + Array allocateFillValueChunk(); + + ChunkKeyEncoding chunkKeyEncoding(); + + Object parsedFillValue(); + + static Object parseFillValue(Object fillValue, @Nonnull DataType dataType) + throws ZarrException { + boolean dataTypeIsBool = dataType == dev.zarr.zarrjava.v3.DataType.BOOL || dataType == dev.zarr.zarrjava.v2.DataType.BOOL; + boolean dataTypeIsByte = dataType == dev.zarr.zarrjava.v3.DataType.INT8 || dataType == dev.zarr.zarrjava.v2.DataType.INT8 || dataType == dev.zarr.zarrjava.v3.DataType.UINT8 || dataType == dev.zarr.zarrjava.v2.DataType.UINT8; + boolean dataTypeIsShort = dataType == dev.zarr.zarrjava.v3.DataType.INT16 || dataType == dev.zarr.zarrjava.v2.DataType.INT16 || dataType == dev.zarr.zarrjava.v3.DataType.UINT16 || dataType == dev.zarr.zarrjava.v2.DataType.UINT16; + boolean dataTypeIsInt = dataType == dev.zarr.zarrjava.v3.DataType.INT32 || dataType == dev.zarr.zarrjava.v2.DataType.INT32 || dataType == dev.zarr.zarrjava.v3.DataType.UINT32 || dataType == dev.zarr.zarrjava.v2.DataType.UINT32; + boolean dataTypeIsLong = dataType == dev.zarr.zarrjava.v3.DataType.INT64 || dataType == dev.zarr.zarrjava.v2.DataType.INT64 || dataType == dev.zarr.zarrjava.v3.DataType.UINT64 || dataType == dev.zarr.zarrjava.v2.DataType.UINT64; + boolean dataTypeIsFloat = dataType == dev.zarr.zarrjava.v3.DataType.FLOAT32 || dataType == dev.zarr.zarrjava.v2.DataType.FLOAT32; + boolean dataTypeIsDouble = dataType == dev.zarr.zarrjava.v3.DataType.FLOAT64 || dataType == dev.zarr.zarrjava.v2.DataType.FLOAT64; + + if (fillValue instanceof Boolean) { + Boolean fillValueBool = (Boolean) fillValue; + if (dataTypeIsBool) { + return fillValueBool; + } + } + if (fillValue instanceof Number) { + Number fillValueNumber = (Number) fillValue; + if (dataTypeIsBool) { + return fillValueNumber.byteValue() != 0; + } else if (dataTypeIsByte) { + return fillValueNumber.byteValue(); + } else if (dataTypeIsShort) { + return fillValueNumber.shortValue(); + } else if (dataTypeIsInt) { + return fillValueNumber.intValue(); + } else if (dataTypeIsLong) { + return fillValueNumber.longValue(); + } else if (dataTypeIsFloat) { + return fillValueNumber.floatValue(); + } else if (dataTypeIsDouble) { + return fillValueNumber.doubleValue(); + } + // Fallback to throwing below + } else if (fillValue instanceof String) { + String fillValueString = (String) fillValue; + if (fillValueString.equals("NaN")) { + if (dataTypeIsFloat) { + return Float.NaN; + } else if (dataTypeIsDouble) { + return Double.NaN; + } + throw new ZarrException( + "Invalid fill value '" + fillValueString + "' for data type '" + dataType + "'."); + } else if (fillValueString.equals("+Infinity")) { + if (dataTypeIsFloat) { + return Float.POSITIVE_INFINITY; + } else if (dataTypeIsDouble) { + return Double.POSITIVE_INFINITY; + } + throw new ZarrException( + "Invalid fill value '" + fillValueString + "' for data type '" + dataType + "'."); + } else if (fillValueString.equals("-Infinity")) { + if (dataTypeIsFloat) { + return Float.NEGATIVE_INFINITY; + } else if (dataTypeIsDouble) { + return Double.NEGATIVE_INFINITY; + } + throw new ZarrException( + "Invalid fill value '" + fillValueString + "' for data type '" + dataType + "'."); + } + else if (fillValueString.startsWith("0b") || fillValueString.startsWith("0x")) { + ByteBuffer buf = null; + if (fillValueString.startsWith("0b")) { + buf = Utils.makeByteBuffer(dataType.getByteCount(), b -> { + for (int i = 0; i < dataType.getByteCount(); i++) { + b.put((byte) Integer.parseInt(fillValueString.substring(2 + i * 8, 2 + (i + 1) * 8), + 2)); + } + return b; + }); + } else if (fillValueString.startsWith("0x")) { + buf = Utils.makeByteBuffer(dataType.getByteCount(), b -> { + for (int i = 0; i < dataType.getByteCount(); i++) { + b.put((byte) Integer.parseInt(fillValueString.substring(2 + i * 2, 2 + (i + 1) * 2), + 16)); + } + return b; + }); + } + if (buf != null) { + if (dataTypeIsBool) { + return buf.get() != 0; + } else if (dataTypeIsByte) { + return buf.get(); + } else if (dataTypeIsShort) { + return buf.getShort(); + } else if (dataTypeIsInt) { + return buf.getInt(); + } else if (dataTypeIsLong) { + return buf.getLong(); + } else if (dataTypeIsFloat) { + return buf.getFloat(); + } else if (dataTypeIsDouble) { + return buf.getDouble(); + // Fallback to throwing below + } + } + } + } + throw new ZarrException("Invalid fill value '" + fillValue + "'."); + } + + final class CoreArrayMetadata { + + public final long[] shape; + public final int[] chunkShape; + public final DataType dataType; + public final Object parsedFillValue; + + public CoreArrayMetadata(long[] shape, int[] chunkShape, DataType dataType, + Object parsedFillValue) { + this.shape = shape; + this.chunkShape = chunkShape; + this.dataType = dataType; + this.parsedFillValue = parsedFillValue; + } + + public int ndim() { + return shape.length; + } + + public int chunkSize() { + return Arrays.stream(chunkShape) + .reduce(1, (acc, a) -> acc * a); + } + + public int chunkByteLength() { + return this.dataType.getByteCount() * chunkSize(); + } + + public ucar.ma2.Array allocateFillValueChunk() { + ucar.ma2.Array outputArray = ucar.ma2.Array.factory(dataType.getMA2DataType(), chunkShape); + MultiArrayUtils.fill(outputArray, parsedFillValue); + return outputArray; + } + } + + +} diff --git a/src/main/java/dev/zarr/zarrjava/core/DataType.java b/src/main/java/dev/zarr/zarrjava/core/DataType.java new file mode 100644 index 0000000..d63de06 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/DataType.java @@ -0,0 +1,7 @@ +package dev.zarr.zarrjava.core; + +public interface DataType { + ucar.ma2.DataType getMA2DataType(); + + int getByteCount(); +} diff --git a/src/main/java/dev/zarr/zarrjava/core/chunkkeyencoding/ChunkKeyEncoding.java b/src/main/java/dev/zarr/zarrjava/core/chunkkeyencoding/ChunkKeyEncoding.java new file mode 100644 index 0000000..c82cb75 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/chunkkeyencoding/ChunkKeyEncoding.java @@ -0,0 +1,7 @@ +package dev.zarr.zarrjava.core.chunkkeyencoding; + +public interface ChunkKeyEncoding { + + String[] encodeChunkKey(long[] chunkCoords); + +} diff --git a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/Separator.java b/src/main/java/dev/zarr/zarrjava/core/chunkkeyencoding/Separator.java similarity index 84% rename from src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/Separator.java rename to src/main/java/dev/zarr/zarrjava/core/chunkkeyencoding/Separator.java index 0aa4910..5074cd5 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/Separator.java +++ b/src/main/java/dev/zarr/zarrjava/core/chunkkeyencoding/Separator.java @@ -1,4 +1,4 @@ -package dev.zarr.zarrjava.v3.chunkkeyencoding; +package dev.zarr.zarrjava.core.chunkkeyencoding; import com.fasterxml.jackson.annotation.JsonValue; diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/AbstractCodec.java b/src/main/java/dev/zarr/zarrjava/core/codec/AbstractCodec.java new file mode 100644 index 0000000..d4c634f --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/AbstractCodec.java @@ -0,0 +1,20 @@ +package dev.zarr.zarrjava.core.codec; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.ArrayMetadata; + +public abstract class AbstractCodec implements Codec{ + protected ArrayMetadata.CoreArrayMetadata arrayMetadata; + + public ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException { + if (arrayMetadata == null) { + throw new ZarrException("arrayMetadata needs to get set in for every codec"); + } + return this.arrayMetadata; + } + + public void setCoreArrayMetadata(ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException{ + this.arrayMetadata = arrayMetadata; + } +} + diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/ArrayArrayCodec.java b/src/main/java/dev/zarr/zarrjava/core/codec/ArrayArrayCodec.java new file mode 100644 index 0000000..be7a7bd --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/ArrayArrayCodec.java @@ -0,0 +1,14 @@ +package dev.zarr.zarrjava.core.codec; + +import dev.zarr.zarrjava.ZarrException; +import ucar.ma2.Array; + +public abstract class ArrayArrayCodec extends AbstractCodec { + + public abstract Array encode(Array chunkArray) + throws ZarrException; + + public abstract Array decode(Array chunkArray) + throws ZarrException; + +} diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/ArrayBytesCodec.java b/src/main/java/dev/zarr/zarrjava/core/codec/ArrayBytesCodec.java similarity index 62% rename from src/main/java/dev/zarr/zarrjava/v3/codec/ArrayBytesCodec.java rename to src/main/java/dev/zarr/zarrjava/core/codec/ArrayBytesCodec.java index 361ae61..cf4ab64 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/ArrayBytesCodec.java +++ b/src/main/java/dev/zarr/zarrjava/core/codec/ArrayBytesCodec.java @@ -1,16 +1,16 @@ -package dev.zarr.zarrjava.v3.codec; +package dev.zarr.zarrjava.core.codec; import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.store.StoreHandle; import java.nio.ByteBuffer; import ucar.ma2.Array; -public abstract class ArrayBytesCodec extends Codec { +public abstract class ArrayBytesCodec extends AbstractCodec { - protected abstract ByteBuffer encode(Array chunkArray) + public abstract ByteBuffer encode(Array chunkArray) throws ZarrException; - protected abstract Array decode(ByteBuffer chunkBytes) + public abstract Array decode(ByteBuffer chunkBytes) throws ZarrException; public abstract static class WithPartialDecode extends ArrayBytesCodec { @@ -19,8 +19,8 @@ public abstract static class WithPartialDecode extends ArrayBytesCodec { public abstract ByteBuffer encode(Array shardArray) throws ZarrException; protected abstract Array decodePartial( - StoreHandle handle, long[] offset, int[] shape - ) throws ZarrException; + StoreHandle handle, long[] offset, int[] shape + ) throws ZarrException; } } diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/BytesBytesCodec.java b/src/main/java/dev/zarr/zarrjava/core/codec/BytesBytesCodec.java new file mode 100644 index 0000000..0574f2f --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/BytesBytesCodec.java @@ -0,0 +1,16 @@ +package dev.zarr.zarrjava.core.codec; + +import dev.zarr.zarrjava.ZarrException; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +public abstract class BytesBytesCodec extends AbstractCodec { + + public abstract ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException; + + public abstract ByteBuffer decode(ByteBuffer chunkBytes) throws ZarrException; + +} diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/Codec.java b/src/main/java/dev/zarr/zarrjava/core/codec/Codec.java new file mode 100644 index 0000000..04bab9a --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/Codec.java @@ -0,0 +1,14 @@ +package dev.zarr.zarrjava.core.codec; + +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.ArrayMetadata; + +public interface Codec { + void setCoreArrayMetadata(ArrayMetadata.CoreArrayMetadata codecArrayMetadata) throws ZarrException; + + ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException; + + default long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException{ + throw new ZarrException("Not implemented for " + this.getClass()); + } +} diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/CodecBuilder.java b/src/main/java/dev/zarr/zarrjava/core/codec/CodecBuilder.java new file mode 100644 index 0000000..f4f1706 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/CodecBuilder.java @@ -0,0 +1,14 @@ +package dev.zarr.zarrjava.core.codec; + +import dev.zarr.zarrjava.core.DataType; + +public abstract class CodecBuilder { + + final protected DataType dataType; + + public CodecBuilder(DataType dataType) { + this.dataType = dataType; + } + + public abstract Codec[] build(); +} diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/CodecPipeline.java b/src/main/java/dev/zarr/zarrjava/core/codec/CodecPipeline.java similarity index 98% rename from src/main/java/dev/zarr/zarrjava/v3/codec/CodecPipeline.java rename to src/main/java/dev/zarr/zarrjava/core/codec/CodecPipeline.java index 920a1f4..9d83836 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/CodecPipeline.java +++ b/src/main/java/dev/zarr/zarrjava/core/codec/CodecPipeline.java @@ -1,8 +1,8 @@ -package dev.zarr.zarrjava.v3.codec; +package dev.zarr.zarrjava.core.codec; import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.store.StoreHandle; -import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata; +import dev.zarr.zarrjava.core.ArrayMetadata.CoreArrayMetadata; import java.nio.ByteBuffer; import java.util.Arrays; import javax.annotation.Nonnull; diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/core/BloscCodec.java b/src/main/java/dev/zarr/zarrjava/core/codec/core/BloscCodec.java new file mode 100644 index 0000000..bdbe3f1 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/core/BloscCodec.java @@ -0,0 +1,73 @@ +package dev.zarr.zarrjava.core.codec.core; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import com.scalableminds.bloscjava.Blosc; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.codec.BytesBytesCodec; +import dev.zarr.zarrjava.utils.Utils; + +import java.io.IOException; +import java.nio.ByteBuffer; + +public abstract class BloscCodec extends BytesBytesCodec { + + @Override + public ByteBuffer decode(ByteBuffer chunkBytes) + throws ZarrException { + try { + return ByteBuffer.wrap(Blosc.decompress(Utils.toArray(chunkBytes))); + } catch (Exception ex) { + throw new ZarrException("Error in decoding blosc.", ex); + } + } + + public static final class CustomCompressorDeserializer extends StdDeserializer { + + public CustomCompressorDeserializer() { + this(null); + } + + public CustomCompressorDeserializer(Class vc) { + super(vc); + } + + @Override + public Blosc.Compressor deserialize(JsonParser jsonParser, DeserializationContext ctxt) + throws IOException { + String cname = jsonParser.getCodec() + .readValue(jsonParser, String.class); + Blosc.Compressor compressor = Blosc.Compressor.fromString(cname); + if (compressor == null) { + throw new JsonParseException( + jsonParser, + String.format("Could not parse the Blosc.Compressor. Got '%s'", cname) + ); + } + return compressor; + } + } + + public static final class CustomCompressorSerializer extends StdSerializer { + + public CustomCompressorSerializer() { + super(Blosc.Compressor.class); + } + + public CustomCompressorSerializer(Class t) { + super(t); + } + + @Override + public void serialize(Blosc.Compressor compressor, JsonGenerator generator, + SerializerProvider provider) + throws IOException { + generator.writeString(compressor.getValue()); + } + } +} diff --git a/src/main/java/dev/zarr/zarrjava/core/codec/core/BytesCodec.java b/src/main/java/dev/zarr/zarrjava/core/codec/core/BytesCodec.java new file mode 100644 index 0000000..7c70b54 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/core/codec/core/BytesCodec.java @@ -0,0 +1,51 @@ +package dev.zarr.zarrjava.core.codec.core; + +import com.fasterxml.jackson.annotation.JsonValue; +import dev.zarr.zarrjava.core.codec.ArrayBytesCodec; +import ucar.ma2.Array; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public abstract class BytesCodec extends ArrayBytesCodec { + protected abstract ByteOrder getByteOrder(); + + @Override + public Array decode(ByteBuffer chunkBytes) { + chunkBytes.order(getByteOrder()); + return Array.factory(arrayMetadata.dataType.getMA2DataType(), arrayMetadata.chunkShape, + chunkBytes); + } + + @Override + public ByteBuffer encode(Array chunkArray) { + return chunkArray.getDataAsByteBuffer(getByteOrder()); + } + + public enum Endian { + LITTLE("little"), + BIG("big"); + private final String endian; + + Endian(String endian) { + this.endian = endian; + } + + @JsonValue + public String getValue() { + return endian; + } + + public ByteOrder getByteOrder() { + switch (this) { + case LITTLE: + return ByteOrder.LITTLE_ENDIAN; + case BIG: + return ByteOrder.BIG_ENDIAN; + default: + throw new RuntimeException("Unreachable"); + } + } + } + +} diff --git a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java index 8ffc5f4..b82424f 100644 --- a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java +++ b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java @@ -3,6 +3,7 @@ import dev.zarr.zarrjava.utils.Utils; import java.nio.ByteBuffer; import java.nio.file.NoSuchFileException; +import java.nio.file.Path; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -70,4 +71,11 @@ public String toString() { public StoreHandle resolve(String... subKeys) { return new StoreHandle(store, Utils.concatArrays(keys, subKeys)); } + + public Path toPath() { + if (!(store instanceof FilesystemStore)) { + throw new UnsupportedOperationException("The underlying store is not a filesystem store."); + } + return ((FilesystemStore) store).resolveKeys(keys); + } } diff --git a/src/main/java/dev/zarr/zarrjava/utils/Utils.java b/src/main/java/dev/zarr/zarrjava/utils/Utils.java index 3ea4bfd..7cc88d2 100644 --- a/src/main/java/dev/zarr/zarrjava/utils/Utils.java +++ b/src/main/java/dev/zarr/zarrjava/utils/Utils.java @@ -3,6 +3,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; @@ -78,6 +79,15 @@ public static T[] concatArrays(T[] array1, T[]... arrays) { return result; } + public static void copyStream(InputStream inputStream, OutputStream outputStream) throws IOException { + byte[] buffer = new byte[4096]; + int len; + while ((len = inputStream.read(buffer)) > 0) { + outputStream.write(buffer, 0, len); + } + } + + public static boolean isPermutation(int[] array) { if (array.length==0){ return false; diff --git a/src/main/java/dev/zarr/zarrjava/v2/Array.java b/src/main/java/dev/zarr/zarrjava/v2/Array.java index f9e590a..52d2268 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Array.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Array.java @@ -2,31 +2,120 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; +import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.Utils; +import dev.zarr.zarrjava.core.codec.CodecPipeline; +import dev.zarr.zarrjava.v2.codec.Codec; +import dev.zarr.zarrjava.v2.codec.CodecRegistry; +import dev.zarr.zarrjava.v2.codec.core.BytesCodec; + +import javax.annotation.Nonnull; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.function.Function; import java.util.stream.Collectors; -public class Array { +public class Array implements dev.zarr.zarrjava.core.Array { static final String ZARRAY = ".zarray"; public ArrayMetadata metadata; public StoreHandle storeHandle; + CodecPipeline codecPipeline; - Array(StoreHandle storeHandle) throws IOException { + protected Array(StoreHandle storeHandle, ArrayMetadata arrayMetadata) throws IOException, ZarrException { this.storeHandle = storeHandle; + this.metadata = arrayMetadata; + this.codecPipeline = new CodecPipeline(Utils.concatArrays( + new Codec[]{}, + metadata.filters == null ? new Codec[]{} : metadata.filters, + new Codec[]{new BytesCodec(arrayMetadata.endianness.toEndian())}, + metadata.compressor == null ? new Codec[]{} : new Codec[]{metadata.compressor} + ), metadata.coreArrayMetadata); + } + /** + * Opens an existing Zarr array at a specified storage location. + * + * @param storeHandle the storage location of the Zarr array + * @throws IOException throws IOException if the metadata cannot be read + * @throws ZarrException throws ZarrException if the Zarr array cannot be opened + */ + public static Array open(StoreHandle storeHandle) throws IOException, ZarrException { + return new Array( + storeHandle, + makeObjectMapper() + .readValue( + Utils.toArray(storeHandle.resolve(ZARRAY).readNonNull()), + ArrayMetadata.class + ) + ); + } + + public static ObjectMapper makeObjectMapper() { ObjectMapper objectMapper = new ObjectMapper(); objectMapper.registerModule(new Jdk8Module()); - this.metadata = objectMapper.readValue( - Utils.toArray(storeHandle.resolve(ZARRAY).readNonNull()), - ArrayMetadata.class - ); + objectMapper.registerSubtypes(CodecRegistry.getNamedTypes()); + return objectMapper; + } + + + /** + * Creates a new Zarr array with the provided metadata at a specified storage location. This + * method will raise an exception if a Zarr array already exists at the specified storage + * location. + * + * @param storeHandle the storage location of the Zarr array + * @param arrayMetadata the metadata of the Zarr array + * @throws IOException if the metadata cannot be serialized + * @throws ZarrException if the Zarr array cannot be created + */ + public static Array create(StoreHandle storeHandle, ArrayMetadata arrayMetadata) + throws IOException, ZarrException { + return Array.create(storeHandle, arrayMetadata, false); + } + + /** + * Creates a new Zarr array with the provided metadata at a specified storage location. If + * `existsOk` is false, this method will raise an exception if a Zarr array already exists at the + * specified storage location. + * + * @param storeHandle the storage location of the Zarr array + * @param arrayMetadata the metadata of the Zarr array + * @param existsOk if true, no exception is raised if the Zarr array already exists + * @throws IOException throws IOException if the metadata cannot be serialized + * @throws ZarrException throws ZarrException if the Zarr array cannot be created + */ + public static Array create(StoreHandle storeHandle, ArrayMetadata arrayMetadata, boolean existsOk) + throws IOException, ZarrException { + StoreHandle metadataHandle = storeHandle.resolve(ZARRAY); + if (!existsOk && metadataHandle.exists()) { + throw new RuntimeException( + "Trying to create a new array in " + storeHandle + ". But " + metadataHandle + + " already exists."); + } + ObjectMapper objectMapper = makeObjectMapper(); + ByteBuffer metadataBytes = ByteBuffer.wrap(objectMapper.writeValueAsBytes(arrayMetadata)); + metadataHandle.set(metadataBytes); + return new Array(storeHandle, arrayMetadata); + } + + public static Array create(StoreHandle storeHandle, + Function arrayMetadataBuilderMapper, + boolean existsOk) throws IOException, ZarrException { + return create(storeHandle, + arrayMetadataBuilderMapper.apply(new ArrayMetadataBuilder()).build(), existsOk); } - public static Array open(StoreHandle storeHandle) throws IOException { - return new Array(storeHandle); + @Nonnull + public static ArrayMetadataBuilder metadataBuilder() { + return new ArrayMetadataBuilder(); + } + + @Nonnull + public static ArrayMetadataBuilder metadataBuilder(ArrayMetadata existingMetadata) { + return ArrayMetadataBuilder.fromArrayMetadata(existingMetadata); } @Override @@ -38,4 +127,20 @@ public String toString() { metadata.dataType ); } + + @Override + public ArrayMetadata metadata() { + return metadata; + } + + @Override + public StoreHandle storeHandle() { + return storeHandle; + } + + @Override + public CodecPipeline codecPipeline() { + return codecPipeline; + } + } diff --git a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadata.java b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadata.java index 24de9d4..d0fa0e1 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadata.java +++ b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadata.java @@ -1,20 +1,37 @@ package dev.zarr.zarrjava.v2; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import dev.zarr.zarrjava.v3.chunkkeyencoding.Separator; -import java.util.Optional; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.chunkkeyencoding.ChunkKeyEncoding; +import dev.zarr.zarrjava.utils.MultiArrayUtils; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; +import dev.zarr.zarrjava.v2.chunkkeyencoding.V2ChunkKeyEncoding; +import dev.zarr.zarrjava.v2.codec.Codec; +import ucar.ma2.Array; -public class ArrayMetadata { +import javax.annotation.Nullable; + +import static dev.zarr.zarrjava.core.ArrayMetadata.parseFillValue; + + +public class ArrayMetadata implements dev.zarr.zarrjava.core.ArrayMetadata { + static final int ZARR_FORMAT = 2; @JsonProperty("zarr_format") - public final int zarrFormat = 2; + public final int zarrFormat = ZARR_FORMAT; public long[] shape; - public long[] chunks; + public int[] chunks; @JsonProperty("dtype") public DataType dataType; + @JsonIgnore + public final Endianness endianness; + + @JsonProperty("order") public Order order; @JsonProperty("dimension_separator") @@ -22,7 +39,91 @@ public class ArrayMetadata { @JsonProperty("fill_value") public Object fillValue; + @JsonIgnore + public final Object parsedFillValue; + + @Nullable + public Codec[] filters; + @Nullable + public Codec compressor; + + @JsonIgnore + public CoreArrayMetadata coreArrayMetadata; + + + @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) + public ArrayMetadata( + @JsonProperty(value = "zarr_format", required = true) int zarrFormat, + @JsonProperty(value = "shape", required = true) long[] shape, + @JsonProperty(value = "chunks", required = true) int[] chunks, + @JsonProperty(value = "dtype", required = true) DataType dataType, + @Nullable @JsonProperty(value = "fill_value", required = true) Object fillValue, + @JsonProperty(value = "order", required = true) Order order, + @Nullable @JsonProperty(value = "filters", required = true) Codec[] filters, + @Nullable @JsonProperty(value = "compressor", required = true) Codec compressor, + @Nullable @JsonProperty(value = "dimension_separator") Separator dimensionSeparator + ) throws ZarrException { + super(); + if (zarrFormat != this.zarrFormat) { + throw new ZarrException( + "Expected zarr format '" + this.zarrFormat + "', got '" + zarrFormat + "'."); + } + this.shape = shape; + this.chunks = chunks; + this.dataType = dataType; + this.endianness = dataType.getEndianness(); + this.fillValue = fillValue; + if (fillValue == null) { + this.parsedFillValue = null; + } else { + this.parsedFillValue = parseFillValue(fillValue, this.dataType); + } + this.order = order; + this.dimensionSeparator = dimensionSeparator; + this.filters = filters; + this.compressor = compressor; + this.coreArrayMetadata = + new ArrayMetadata.CoreArrayMetadata(shape, chunks, + this.dataType, + parsedFillValue + ); + } + + + public int ndim() { + return shape.length; + } + + @Override + public int[] chunkShape() { + return chunks; + } + + @Override + public long[] shape() { + return shape; + } + + @Override + public DataType dataType() { + return dataType; + } + + @Override + public Array allocateFillValueChunk() { + ucar.ma2.Array outputArray = ucar.ma2.Array.factory(dataType.getMA2DataType(), chunks); + if (parsedFillValue != null) MultiArrayUtils.fill(outputArray, parsedFillValue); + return outputArray; + } + + @Override + public ChunkKeyEncoding chunkKeyEncoding() { + Separator separator = dimensionSeparator == null ? Separator.DOT : dimensionSeparator; + return new V2ChunkKeyEncoding(separator); + } - public Optional filters; - public Optional compressor; + @Override + public Object parsedFillValue() { + return parsedFillValue; + } } diff --git a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java new file mode 100644 index 0000000..ea29b16 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java @@ -0,0 +1,144 @@ +package dev.zarr.zarrjava.v2; + +import com.scalableminds.bloscjava.Blosc; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; +import dev.zarr.zarrjava.v2.codec.Codec; +import dev.zarr.zarrjava.v2.codec.core.BloscCodec; +import dev.zarr.zarrjava.v2.codec.core.ZlibCodec; + +public class ArrayMetadataBuilder { + long[] shape = null; + int[] chunks = null; + DataType dataType = null; + Order order = Order.C; + Separator dimensionSeparator = Separator.DOT; + Object fillValue = null; + Codec[] filters = null; + Codec compressor = null; + + + protected ArrayMetadataBuilder() { + } + + protected static ArrayMetadataBuilder fromArrayMetadata(ArrayMetadata arrayMetadata) { + ArrayMetadataBuilder builder = new ArrayMetadataBuilder(); + builder.shape = arrayMetadata.shape; + builder.chunks = arrayMetadata.chunks; + builder.dataType = arrayMetadata.dataType; + builder.order = arrayMetadata.order; + builder.dimensionSeparator = arrayMetadata.dimensionSeparator; + builder.fillValue = arrayMetadata.parsedFillValue; + builder.filters = arrayMetadata.filters; + builder.compressor = arrayMetadata.compressor; + return builder; + } + + public ArrayMetadataBuilder withShape(long... shape) { + this.shape = shape; + return this; + } + + public ArrayMetadataBuilder withChunks(int... chunks) { + this.chunks = chunks; + return this; + } + + public ArrayMetadataBuilder withDataType(DataType dataTypeV2) { + this.dataType = dataTypeV2; + return this; + } + + public ArrayMetadataBuilder withOrder(Order order) { + this.order = order; + return this; + } + + public ArrayMetadataBuilder withDimensionSeparator(Separator dimensionSeparator) { + this.dimensionSeparator = dimensionSeparator; + return this; + } + + public ArrayMetadataBuilder withFillValue(Object fillValue) { + this.fillValue = fillValue; + return this; + } + + public ArrayMetadataBuilder withCompressor(Codec compressor) { + this.compressor = compressor; + return this; + } + + public ArrayMetadataBuilder withBloscCompressor( + Blosc.Compressor cname, Blosc.Shuffle shuffle, int clevel, int typeSize, + int blockSize + ) { + try { + this.compressor = new BloscCodec(cname, shuffle, clevel, typeSize, blockSize); + } catch (ZarrException e) { + throw new RuntimeException(e); + } + return this; + } + + public ArrayMetadataBuilder withBloscCompressor(String cname, String shuffle, int clevel, int blockSize) { + if (shuffle.equals("shuffle")) { + shuffle = "byteshuffle"; + } + return withBloscCompressor(Blosc.Compressor.fromString(cname), Blosc.Shuffle.fromString(shuffle), clevel, + dataType.getByteCount(), blockSize + ); + } + + public ArrayMetadataBuilder withBloscCompressor(String cname, String shuffle, int clevel) { + return withBloscCompressor(cname, shuffle, clevel, 0); + } + + public ArrayMetadataBuilder withBloscCompressor(String cname, int clevel) { + return withBloscCompressor(cname, "noshuffle", clevel); + } + + public ArrayMetadataBuilder withBloscCompressor(String cname) { + return withBloscCompressor(cname, 5); + } + + public ArrayMetadataBuilder withBloscCompressor() { + return withBloscCompressor("zstd"); + } + + public ArrayMetadataBuilder withZlibCompressor(int level) { + try { + this.compressor = new ZlibCodec(level); + } catch (ZarrException e) { + throw new RuntimeException(e); + } + return this; + } + + public ArrayMetadataBuilder withZlibCompressor() { + return withZlibCompressor(5); + } + + public ArrayMetadata build() throws ZarrException { + if (shape == null) { + throw new IllegalStateException("Please call `withShape` first."); + } + if (chunks == null) { + throw new IllegalStateException("Please call `withChunks` first."); + } + if (dataType == null) { + throw new IllegalStateException("Please call `withDataType` first."); + } + return new ArrayMetadata( + 2, + shape, + chunks, + dataType, + fillValue, + order, + filters, + compressor, + dimensionSeparator + ); + } +} \ No newline at end of file diff --git a/src/main/java/dev/zarr/zarrjava/v2/Codec.java b/src/main/java/dev/zarr/zarrjava/v2/Codec.java deleted file mode 100644 index b12daed..0000000 --- a/src/main/java/dev/zarr/zarrjava/v2/Codec.java +++ /dev/null @@ -1,5 +0,0 @@ -package dev.zarr.zarrjava.v2; - -public class Codec { - -} diff --git a/src/main/java/dev/zarr/zarrjava/v2/DataType.java b/src/main/java/dev/zarr/zarrjava/v2/DataType.java index 3d592e2..4583487 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/DataType.java +++ b/src/main/java/dev/zarr/zarrjava/v2/DataType.java @@ -2,7 +2,7 @@ import com.fasterxml.jackson.annotation.JsonValue; -public enum DataType { +public enum DataType implements dev.zarr.zarrjava.core.DataType { BOOL("b1", Endianness.UNSPECIFIED), INT8("i1", Endianness.UNSPECIFIED), INT16("i2", Endianness.LITTLE), @@ -18,13 +18,54 @@ public enum DataType { private final String dtype; private final Endianness endianness; + DataType(String dtype, Endianness endianness) { this.dtype = dtype; this.endianness = endianness; } + public Endianness getEndianness() { + return endianness; + } + @JsonValue public String getValue() { return String.format("%s%s", endianness.getValue(), dtype); } + + @Override + public ucar.ma2.DataType getMA2DataType() { + switch (this) { + case BOOL: + return ucar.ma2.DataType.BOOLEAN; + case INT8: + return ucar.ma2.DataType.BYTE; + case INT16: + return ucar.ma2.DataType.SHORT; + case INT32: + return ucar.ma2.DataType.INT; + case INT64: + return ucar.ma2.DataType.LONG; + case UINT8: + return ucar.ma2.DataType.UBYTE; + case UINT16: + return ucar.ma2.DataType.USHORT; + case UINT32: + return ucar.ma2.DataType.UINT; + case UINT64: + return ucar.ma2.DataType.ULONG; + case FLOAT32: + return ucar.ma2.DataType.FLOAT; + case FLOAT64: + return ucar.ma2.DataType.DOUBLE; + default: + throw new RuntimeException("Unreachable"); + } + } + + @Override + public int getByteCount() { + return Integer.parseInt(dtype.substring(1)); + } + } diff --git a/src/main/java/dev/zarr/zarrjava/v2/Endianness.java b/src/main/java/dev/zarr/zarrjava/v2/Endianness.java index cc7a6b4..1f064cb 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Endianness.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Endianness.java @@ -1,6 +1,7 @@ package dev.zarr.zarrjava.v2; import com.fasterxml.jackson.annotation.JsonValue; +import dev.zarr.zarrjava.v2.codec.core.BytesCodec; public enum Endianness { LITTLE("<"), @@ -17,4 +18,16 @@ public enum Endianness { public String getValue() { return value; } + + public BytesCodec.Endian toEndian() { + switch (this) { + case LITTLE: + return BytesCodec.Endian.LITTLE; + case BIG: + return BytesCodec.Endian.BIG; + case UNSPECIFIED: + default: + return BytesCodec.Endian.LITTLE; + } + } } \ No newline at end of file diff --git a/src/main/java/dev/zarr/zarrjava/v2/chunkkeyencoding/V2ChunkKeyEncoding.java b/src/main/java/dev/zarr/zarrjava/v2/chunkkeyencoding/V2ChunkKeyEncoding.java new file mode 100644 index 0000000..712306e --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/chunkkeyencoding/V2ChunkKeyEncoding.java @@ -0,0 +1,33 @@ +package dev.zarr.zarrjava.v2.chunkkeyencoding; + +import dev.zarr.zarrjava.core.chunkkeyencoding.ChunkKeyEncoding; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; + +import javax.annotation.Nonnull; +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class V2ChunkKeyEncoding implements ChunkKeyEncoding { + + public final String name = "v2"; + @Nonnull + public final Separator separator; + + public V2ChunkKeyEncoding( + @Nonnull Separator separator + ) { + this.separator = separator; + } + + @Override + public String[] encodeChunkKey(long[] chunkCoords) { + Stream keys = Arrays.stream(chunkCoords) + .mapToObj(Long::toString); + if (separator == Separator.SLASH) { + return keys.toArray(String[]::new); + } + return new String[]{keys.collect(Collectors.joining(this.separator.getValue()))}; + } +} + diff --git a/src/main/java/dev/zarr/zarrjava/v2/codec/Codec.java b/src/main/java/dev/zarr/zarrjava/v2/codec/Codec.java new file mode 100644 index 0000000..43f02f1 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/codec/Codec.java @@ -0,0 +1,7 @@ +package dev.zarr.zarrjava.v2.codec; + +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "id") +public interface Codec extends dev.zarr.zarrjava.core.codec.Codec {} + diff --git a/src/main/java/dev/zarr/zarrjava/v2/codec/CodecRegistry.java b/src/main/java/dev/zarr/zarrjava/v2/codec/CodecRegistry.java new file mode 100644 index 0000000..2b0f7a6 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/codec/CodecRegistry.java @@ -0,0 +1,29 @@ +package dev.zarr.zarrjava.v2.codec; + +import com.fasterxml.jackson.databind.jsontype.NamedType; +import dev.zarr.zarrjava.v2.codec.core.*; + +import java.util.HashMap; +import java.util.Map; + +public class CodecRegistry { + + static Map> map = new HashMap<>(); + + static { + addType("blosc", BloscCodec.class); + addType("zlib", ZlibCodec.class); + } + + public static void addType(String name, Class codecClass) { + map.put(name, codecClass); + } + + public static NamedType[] getNamedTypes() { + return map.entrySet() + .stream() + .map(entry -> new NamedType(entry.getValue(), entry.getKey())) + .toArray( + NamedType[]::new); + } +} diff --git a/src/main/java/dev/zarr/zarrjava/v2/codec/core/BloscCodec.java b/src/main/java/dev/zarr/zarrjava/v2/codec/core/BloscCodec.java new file mode 100644 index 0000000..4838488 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/codec/core/BloscCodec.java @@ -0,0 +1,111 @@ +package dev.zarr.zarrjava.v2.codec.core; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import com.scalableminds.bloscjava.Blosc; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.utils.Utils; +import dev.zarr.zarrjava.v2.codec.Codec; + +import javax.annotation.Nonnull; +import java.io.IOException; +import java.nio.ByteBuffer; + +public class BloscCodec extends dev.zarr.zarrjava.core.codec.core.BloscCodec implements Codec { + + public final String id = "blosc"; + + @Nonnull + @JsonSerialize(using = CustomCompressorSerializer.class) + public final Blosc.Compressor cname; + @Nonnull + @JsonSerialize(using = CustomShuffleSerializer.class) + public final Blosc.Shuffle shuffle; + public final int clevel; + public final int typesize; + public final int blocksize; + + @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) + public BloscCodec( + @Nonnull @JsonProperty(value = "cname", defaultValue = "zstd") + @JsonDeserialize(using = CustomCompressorDeserializer.class) + Blosc.Compressor cname, + @Nonnull @JsonProperty(value = "shuffle", defaultValue = "noshuffle") + @JsonDeserialize(using = CustomShuffleDeserializer.class) Blosc.Shuffle shuffle, + @JsonProperty(value = "clevel", defaultValue = "5") int clevel, + @JsonProperty(value = "typesize", defaultValue = "0") int typesize, + @JsonProperty(value = "blocksize", defaultValue = "0") int blocksize + ) throws ZarrException { + if (typesize < 1 && shuffle != Blosc.Shuffle.NO_SHUFFLE) { + typesize = 4; //todo: in v2 typesize is not a required parameter. default to correct value based on dtype + } + if (clevel < 0 || clevel > 9) { + throw new ZarrException("'clevel' needs to be between 0 and 9."); + } + this.cname = cname; + this.shuffle = shuffle; + this.clevel = clevel; + this.typesize = typesize; + this.blocksize = blocksize; + } + + + @Override + public ByteBuffer encode(ByteBuffer chunkBytes) + throws ZarrException { + try { + return ByteBuffer.wrap( + Blosc.compress(Utils.toArray(chunkBytes), this.typesize, this.cname, + this.clevel, + this.shuffle, this.blocksize + )); + } catch (Exception ex) { + throw new ZarrException("Error in encoding blosc.", ex); + } + } + + public static final class CustomShuffleSerializer extends StdSerializer { + + public CustomShuffleSerializer() { + super(Blosc.Shuffle.class); + } + + public CustomShuffleSerializer(Class t) { + super(t); + } + + @Override + public void serialize(Blosc.Shuffle shuffle, JsonGenerator generator, + SerializerProvider provider) + throws IOException { + generator.writeNumber(shuffle.ordinal()); + } + } + + public static final class CustomShuffleDeserializer extends StdDeserializer { + + public CustomShuffleDeserializer() { + this(null); + } + + public CustomShuffleDeserializer(Class vc) { + super(vc); + } + + @Override + public Blosc.Shuffle deserialize(JsonParser jsonParser, DeserializationContext ctxt) + throws IOException { + int shuffle = jsonParser.getCodec() + .readValue(jsonParser, int.class); + return Blosc.Shuffle.values()[shuffle]; + } + } +} diff --git a/src/main/java/dev/zarr/zarrjava/v2/codec/core/BytesCodec.java b/src/main/java/dev/zarr/zarrjava/v2/codec/core/BytesCodec.java new file mode 100644 index 0000000..ac0cd39 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/codec/core/BytesCodec.java @@ -0,0 +1,26 @@ +package dev.zarr.zarrjava.v2.codec.core; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import dev.zarr.zarrjava.v2.codec.Codec; + +import javax.annotation.Nonnull; +import java.nio.ByteOrder; + +public class BytesCodec extends dev.zarr.zarrjava.core.codec.core.BytesCodec implements Codec { + public final String name = "bytes"; + @Nonnull + public final Endian endian; + + @JsonCreator + public BytesCodec( + @JsonProperty(value = "endian", defaultValue = "little") Endian endian) { + this.endian = endian; + } + + @Override + protected ByteOrder getByteOrder() { + return endian.getByteOrder(); + } +} + diff --git a/src/main/java/dev/zarr/zarrjava/v2/codec/core/ZlibCodec.java b/src/main/java/dev/zarr/zarrjava/v2/codec/core/ZlibCodec.java new file mode 100644 index 0000000..8b2bed6 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/v2/codec/core/ZlibCodec.java @@ -0,0 +1,55 @@ +package dev.zarr.zarrjava.v2.codec.core; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.utils.Utils; +import dev.zarr.zarrjava.v2.codec.Codec; +import dev.zarr.zarrjava.core.codec.BytesBytesCodec; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.zip.*; + +public class ZlibCodec extends BytesBytesCodec implements Codec { + + public final String id = "zlib"; + public final int level; + + + @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) + public ZlibCodec( + @JsonProperty(value = "level", defaultValue = "1") int level) throws ZarrException { + if (level < 0 || level > 9) { + throw new ZarrException("'level' needs to be between 0 and 9."); + } + this.level = level; + } + + + @Override + public ByteBuffer decode(ByteBuffer chunkBytes) throws ZarrException { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); InflaterInputStream inputStream = new InflaterInputStream( + new ByteArrayInputStream(Utils.toArray(chunkBytes)))) { + Utils.copyStream(inputStream, outputStream); + inputStream.close(); + return ByteBuffer.wrap(outputStream.toByteArray()); + } catch (IOException ex) { + throw new ZarrException("Error in decoding gzip.", ex); + } + } + + @Override + public ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + DeflaterOutputStream dos = new DeflaterOutputStream(outputStream, new Deflater(this.level))) { + dos.write(Utils.toArray(chunkBytes)); + dos.close(); + return ByteBuffer.wrap(outputStream.toByteArray()); + } catch (IOException ex) { + throw new ZarrException("Error in encoding zlib.", ex); + } + } +} diff --git a/src/main/java/dev/zarr/zarrjava/v3/Array.java b/src/main/java/dev/zarr/zarrjava/v3/Array.java index 0d240be..f21b020 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/Array.java +++ b/src/main/java/dev/zarr/zarrjava/v3/Array.java @@ -3,10 +3,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.store.StoreHandle; -import dev.zarr.zarrjava.utils.IndexingUtils; -import dev.zarr.zarrjava.utils.MultiArrayUtils; import dev.zarr.zarrjava.utils.Utils; -import dev.zarr.zarrjava.v3.codec.CodecPipeline; +import dev.zarr.zarrjava.core.codec.CodecPipeline; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -14,18 +12,15 @@ import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import ucar.ma2.InvalidRangeException; -public class Array extends Node { +public class Array extends Node implements dev.zarr.zarrjava.core.Array { public ArrayMetadata metadata; CodecPipeline codecPipeline; protected Array(StoreHandle storeHandle, ArrayMetadata arrayMetadata) - throws IOException, ZarrException { + throws ZarrException { super(storeHandle); this.metadata = arrayMetadata; this.codecPipeline = new CodecPipeline(arrayMetadata.codecs, arrayMetadata.coreArrayMetadata); @@ -34,9 +29,9 @@ protected Array(StoreHandle storeHandle, ArrayMetadata arrayMetadata) /** * Opens an existing Zarr array at a specified storage location. * - * @param storeHandle - * @throws IOException - * @throws ZarrException + * @param storeHandle the storage location of the Zarr array + * @throws IOException throws IOException if the metadata cannot be read + * @throws ZarrException throws ZarrException if the Zarr array cannot be opened */ public static Array open(StoreHandle storeHandle) throws IOException, ZarrException { return new Array( @@ -54,10 +49,10 @@ public static Array open(StoreHandle storeHandle) throws IOException, ZarrExcept * method will raise an exception if a Zarr array already exists at the specified storage * location. * - * @param storeHandle - * @param arrayMetadata - * @throws IOException - * @throws ZarrException + * @param storeHandle the storage location of the Zarr array + * @param arrayMetadata the metadata of the Zarr array + * @throws IOException if the metadata cannot be serialized + * @throws ZarrException if the Zarr array cannot be created */ public static Array create(StoreHandle storeHandle, ArrayMetadata arrayMetadata) throws IOException, ZarrException { @@ -69,11 +64,11 @@ public static Array create(StoreHandle storeHandle, ArrayMetadata arrayMetadata) * `existsOk` is false, this method will raise an exception if a Zarr array already exists at the * specified storage location. * - * @param storeHandle - * @param arrayMetadata - * @param existsOk - * @throws IOException - * @throws ZarrException + * @param storeHandle the storage location of the Zarr array + * @param arrayMetadata the metadata of the Zarr array + * @param existsOk if true, no exception is raised if the Zarr array already exists + * @throws IOException throws IOException if the metadata cannot be serialized + * @throws ZarrException throws ZarrException if the Zarr array cannot be created */ public static Array create(StoreHandle storeHandle, ArrayMetadata arrayMetadata, boolean existsOk) throws IOException, ZarrException { @@ -95,11 +90,11 @@ public static Array create(StoreHandle storeHandle, ArrayMetadata arrayMetadata, * be used to construct the metadata of the Zarr array. If `existsOk` is false, this method will * raise an exception if a Zarr array already exists at the specified storage location. * - * @param storeHandle - * @param arrayMetadataBuilderMapper - * @param existsOk - * @throws IOException - * @throws ZarrException + * @param storeHandle the storage location of the Zarr array + * @param arrayMetadataBuilderMapper a callback that is used to construct the metadata of the Zarr array + * @param existsOk if true, no exception is raised if the Zarr array already exists + * @throws IOException if the metadata cannot be serialized + * @throws ZarrException if the Zarr array cannot be created */ public static Array create(StoreHandle storeHandle, Function arrayMetadataBuilderMapper, @@ -118,261 +113,20 @@ public static ArrayMetadataBuilder metadataBuilder(ArrayMetadata existingMetadat return ArrayMetadataBuilder.fromArrayMetadata(existingMetadata); } - /** - * Reads the entire Zarr array into an ucar.ma2.Array. - * Utilizes no parallelism. - * - * @throws ZarrException - */ - @Nonnull - public ucar.ma2.Array read() throws ZarrException { - return read(new long[metadata.ndim()], Utils.toIntArray(metadata.shape)); - } - - /** - * Reads a part of the Zarr array based on a requested offset and shape into an ucar.ma2.Array. - * Utilizes no parallelism. - * - * @param offset - * @param shape - * @throws ZarrException - */ - @Nonnull - public ucar.ma2.Array read(final long[] offset, final int[] shape) throws ZarrException { - return read(offset, shape, false); - } - - /** - * Reads the entire Zarr array into an ucar.ma2.Array. - * - * @param parallel - * @throws ZarrException - */ - @Nonnull - public ucar.ma2.Array read(final boolean parallel) throws ZarrException { - return read(new long[metadata.ndim()], Utils.toIntArray(metadata.shape), parallel); - } - - /** - * Reads a part of the Zarr array based on a requested offset and shape into an ucar.ma2.Array. - * - * @param offset - * @param shape - * @param parallel - * @throws ZarrException - */ - @Nonnull - public ucar.ma2.Array read(final long[] offset, final int[] shape, final boolean parallel) throws ZarrException { - if (offset.length != metadata.ndim()) { - throw new IllegalArgumentException("'offset' needs to have rank '" + metadata.ndim() + "'."); - } - if (shape.length != metadata.ndim()) { - throw new IllegalArgumentException("'shape' needs to have rank '" + metadata.ndim() + "'."); - } - for (int dimIdx = 0; dimIdx < metadata.ndim(); dimIdx++) { - if (offset[dimIdx] < 0 || offset[dimIdx] + shape[dimIdx] > metadata.shape[dimIdx]) { - throw new ZarrException("Requested data is outside of the array's domain."); - } - } - - final int[] chunkShape = metadata.chunkShape(); - if (IndexingUtils.isSingleFullChunk(offset, shape, chunkShape)) { - return readChunk(IndexingUtils.computeSingleChunkCoords(offset, chunkShape)); - } - - final ucar.ma2.Array outputArray = ucar.ma2.Array.factory(metadata.dataType.getMA2DataType(), - shape); - Stream chunkStream = Arrays.stream(IndexingUtils.computeChunkCoords(metadata.shape, chunkShape, offset, shape)); - if (parallel) { - chunkStream = chunkStream.parallel(); - } - chunkStream.forEach( - chunkCoords -> { - try { - final IndexingUtils.ChunkProjection chunkProjection = - IndexingUtils.computeProjection(chunkCoords, metadata.shape, chunkShape, offset, - shape - ); - - if (chunkIsInArray(chunkCoords)) { - MultiArrayUtils.copyRegion(metadata.allocateFillValueChunk(), - chunkProjection.chunkOffset, outputArray, chunkProjection.outOffset, - chunkProjection.shape - ); - } - - final String[] chunkKeys = metadata.chunkKeyEncoding.encodeChunkKey(chunkCoords); - final StoreHandle chunkHandle = storeHandle.resolve(chunkKeys); - if (!chunkHandle.exists()) { - return; - } - if (codecPipeline.supportsPartialDecode()) { - final ucar.ma2.Array chunkArray = codecPipeline.decodePartial(chunkHandle, - Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape); - MultiArrayUtils.copyRegion(chunkArray, new int[metadata.ndim()], outputArray, - chunkProjection.outOffset, chunkProjection.shape - ); - } else { - MultiArrayUtils.copyRegion(readChunk(chunkCoords), chunkProjection.chunkOffset, - outputArray, chunkProjection.outOffset, chunkProjection.shape - ); - } - - } catch (ZarrException e) { - throw new RuntimeException(e); - } - }); - return outputArray; - } - - boolean chunkIsInArray(long[] chunkCoords) { - final int[] chunkShape = metadata.chunkShape(); - for (int dimIdx = 0; dimIdx < metadata.ndim(); dimIdx++) { - if (chunkCoords[dimIdx] < 0 - || chunkCoords[dimIdx] * chunkShape[dimIdx] >= metadata.shape[dimIdx]) { - return false; - } - } - return true; - } - - /** - * Reads one chunk of the Zarr array as specified by the chunk coordinates into an - * ucar.ma2.Array. - * - * @param chunkCoords The coordinates of the chunk as computed by the offset of the chunk divided - * by the chunk shape. - * @throws ZarrException - */ - @Nonnull - public ucar.ma2.Array readChunk(long[] chunkCoords) - throws ZarrException { - if (!chunkIsInArray(chunkCoords)) { - throw new ZarrException("Attempting to read data outside of the array's domain."); - } - - final String[] chunkKeys = metadata.chunkKeyEncoding.encodeChunkKey(chunkCoords); - final StoreHandle chunkHandle = storeHandle.resolve(chunkKeys); - - ByteBuffer chunkBytes = chunkHandle.read(); - if (chunkBytes == null) { - return metadata.allocateFillValueChunk(); - } - - return codecPipeline.decode(chunkBytes); - } - - /** - * Writes a ucar.ma2.Array into the Zarr array at the beginning of the Zarr array. The shape of - * the Zarr array needs be large enough for the write. - * Utilizes no parallelism. - * - * @param array - */ - public void write(ucar.ma2.Array array) { - write(new long[metadata.ndim()], array); - } - - /** - * Writes a ucar.ma2.Array into the Zarr array at a specified offset. The shape of the Zarr array - * needs be large enough for the write. - * Utilizes no parallelism. - * - * @param offset - * @param array - */ - public void write(long[] offset, ucar.ma2.Array array) { - write(offset, array, false); - } - - /** - * Writes a ucar.ma2.Array into the Zarr array at the beginning of the Zarr array. The shape of - * the Zarr array needs be large enough for the write. - * - * @param array - * @param parallel - */ - public void write(ucar.ma2.Array array, boolean parallel) { - write(new long[metadata.ndim()], array, parallel); + @Override + public CodecPipeline codecPipeline() { + return codecPipeline; } - /** - * Writes a ucar.ma2.Array into the Zarr array at a specified offset. The shape of the Zarr array - * needs be large enough for the write. - * - * @param offset - * @param array - * @param parallel - */ - public void write(long[] offset, ucar.ma2.Array array, boolean parallel) { - if (offset.length != metadata.ndim()) { - throw new IllegalArgumentException("'offset' needs to have rank '" + metadata.ndim() + "'."); - } - if (array.getRank() != metadata.ndim()) { - throw new IllegalArgumentException("'array' needs to have rank '" + metadata.ndim() + "'."); - } - - int[] shape = array.getShape(); - final int[] chunkShape = metadata.chunkShape(); - Stream chunkStream = Arrays.stream(IndexingUtils.computeChunkCoords(metadata.shape, chunkShape, offset, shape)); - if(parallel) { - chunkStream = chunkStream.parallel(); - } - chunkStream.forEach( - chunkCoords -> { - try { - final IndexingUtils.ChunkProjection chunkProjection = - IndexingUtils.computeProjection(chunkCoords, metadata.shape, chunkShape, offset, - shape - ); - - ucar.ma2.Array chunkArray; - if (IndexingUtils.isFullChunk(chunkProjection.chunkOffset, chunkProjection.shape, - chunkShape - )) { - chunkArray = array.sectionNoReduce(chunkProjection.outOffset, - chunkProjection.shape, - null - ); - } else { - chunkArray = readChunk(chunkCoords); - MultiArrayUtils.copyRegion(array, chunkProjection.outOffset, chunkArray, - chunkProjection.chunkOffset, chunkProjection.shape - ); - } - writeChunk(chunkCoords, chunkArray); - } catch (ZarrException | InvalidRangeException e) { - throw new RuntimeException(e); - } - }); + @Override + public ArrayMetadata metadata() { + return metadata; } - /** - * Writes one chunk into the Zarr array as specified by the chunk coordinates. The shape of the - * Zarr array needs be large enough for the write. - * - * @param chunkCoords - * @param chunkArray - * @throws ZarrException - */ - public void writeChunk(long[] chunkCoords, ucar.ma2.Array chunkArray) throws ZarrException { - String[] chunkKeys = metadata.chunkKeyEncoding.encodeChunkKey(chunkCoords); - StoreHandle chunkHandle = storeHandle.resolve(chunkKeys); - - if (MultiArrayUtils.allValuesEqual(chunkArray, metadata.parsedFillValue)) { - chunkHandle.delete(); - } else { - ByteBuffer chunkBytes = codecPipeline.encode(chunkArray); - chunkHandle.set(chunkBytes); - } - } - public ArrayAccessor access() { - return new ArrayAccessor(this); - } private Array writeMetadata(ArrayMetadata newArrayMetadata) throws ZarrException, IOException { ObjectMapper objectMapper = makeObjectMapper(); @@ -387,9 +141,9 @@ private Array writeMetadata(ArrayMetadata newArrayMetadata) throws ZarrException * deleted. This method returns a new instance of the Zarr array class and the old instance * becomes invalid. * - * @param newShape - * @throws ZarrException - * @throws IOException + * @param newShape the new shape of the Zarr array + * @throws ZarrException if the new metadata is invalid + * @throws IOException throws IOException if the new metadata cannot be serialized */ public Array resize(long[] newShape) throws ZarrException, IOException { if (newShape.length != metadata.ndim()) { @@ -407,9 +161,9 @@ public Array resize(long[] newShape) throws ZarrException, IOException { * Sets the attributes of the Zarr array. It overwrites and removes any existing attributes. This * method returns a new instance of the Zarr array class and the old instance becomes invalid. * - * @param newAttributes - * @throws ZarrException - * @throws IOException + * @param newAttributes the new attributes of the Zarr array + * @throws ZarrException throws ZarrException if the new metadata is invalid + * @throws IOException throws IOException if the new metadata cannot be serialized */ public Array setAttributes(Map newAttributes) throws ZarrException, IOException { ArrayMetadata newArrayMetadata = @@ -425,9 +179,9 @@ public Array setAttributes(Map newAttributes) throws ZarrExcepti * callback may be mutated. This method overwrites and removes any existing attributes. This * method returns a new instance of the Zarr array class and the old instance becomes invalid. * - * @param attributeMapper - * @throws ZarrException - * @throws IOException + * @param attributeMapper the callback that is used to construct the new attributes + * @throws ZarrException throws ZarrException if the new metadata is invalid + * @throws IOException throws IOException if the new metadata cannot be serialized */ public Array updateAttributes(Function, Map> attributeMapper) throws ZarrException, IOException { @@ -445,54 +199,5 @@ public String toString() { ); } - public static final class ArrayAccessor { - - @Nullable - long[] offset; - @Nullable - int[] shape; - @Nonnull - Array array; - - private ArrayAccessor(@Nonnull Array array) { - this.array = array; - } - @Nonnull - public ArrayAccessor withOffset(@Nonnull long... offset) { - this.offset = offset; - return this; - } - - - @Nonnull - public ArrayAccessor withShape(@Nonnull int... shape) { - this.shape = shape; - return this; - } - - @Nonnull - public ArrayAccessor withShape(@Nonnull long... shape) { - this.shape = Utils.toIntArray(shape); - return this; - } - - @Nonnull - public ucar.ma2.Array read() throws ZarrException { - if (offset == null) { - throw new ZarrException("`offset` needs to be set."); - } - if (shape == null) { - throw new ZarrException("`shape` needs to be set."); - } - return array.read(offset, shape); - } - - public void write(@Nonnull ucar.ma2.Array content) throws ZarrException { - if (offset == null) { - throw new ZarrException("`offset` needs to be set."); - } - array.write(offset, content); - } - } } diff --git a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadata.java b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadata.java index f23a9fc..d50d9d0 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadata.java +++ b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadata.java @@ -4,15 +4,13 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import dev.zarr.zarrjava.ZarrException; -import dev.zarr.zarrjava.utils.MultiArrayUtils; -import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.chunkgrid.ChunkGrid; import dev.zarr.zarrjava.v3.chunkgrid.RegularChunkGrid; import dev.zarr.zarrjava.v3.chunkkeyencoding.ChunkKeyEncoding; import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.v3.codec.core.ShardingIndexedCodec; +import static dev.zarr.zarrjava.core.ArrayMetadata.parseFillValue; -import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -20,7 +18,7 @@ import javax.annotation.Nullable; -public final class ArrayMetadata { +public final class ArrayMetadata implements dev.zarr.zarrjava.core.ArrayMetadata { static final String NODE_TYPE = "array"; static final int ZARR_FORMAT = 3; @@ -142,122 +140,21 @@ public ArrayMetadata( ); } - public static Object parseFillValue(Object fillValue, @Nonnull DataType dataType) - throws ZarrException { - if (fillValue instanceof Boolean) { - Boolean fillValueBool = (Boolean) fillValue; - if (dataType == DataType.BOOL) { - return fillValueBool; - } - } - if (fillValue instanceof Number) { - Number fillValueNumber = (Number) fillValue; - switch (dataType) { - case BOOL: - return fillValueNumber.byteValue() != 0; - case INT8: - case UINT8: - return fillValueNumber.byteValue(); - case INT16: - case UINT16: - return fillValueNumber.shortValue(); - case INT32: - case UINT32: - return fillValueNumber.intValue(); - case INT64: - case UINT64: - return fillValueNumber.longValue(); - case FLOAT32: - return fillValueNumber.floatValue(); - case FLOAT64: - return fillValueNumber.doubleValue(); - default: - // Fallback to throwing below - } - } else if (fillValue instanceof String) { - String fillValueString = (String) fillValue; - if (fillValueString.equals("NaN")) { - switch (dataType) { - case FLOAT32: - return Float.NaN; - case FLOAT64: - return Double.NaN; - default: - throw new ZarrException( - "Invalid fill value '" + fillValueString + "' for data type '" + dataType + "'."); - } - } else if (fillValueString.equals("+Infinity")) { - switch (dataType) { - case FLOAT32: - return Float.POSITIVE_INFINITY; - case FLOAT64: - return Double.POSITIVE_INFINITY; - default: - throw new ZarrException( - "Invalid fill value '" + fillValueString + "' for data type '" + dataType + "'."); - } - } else if (fillValueString.equals("-Infinity")) { - switch (dataType) { - case FLOAT32: - return Float.NEGATIVE_INFINITY; - case FLOAT64: - return Double.NEGATIVE_INFINITY; - default: - throw new ZarrException( - "Invalid fill value '" + fillValueString + "' for data type '" + dataType + "'."); - } - } else if (fillValueString.startsWith("0b") || fillValueString.startsWith("0x")) { - ByteBuffer buf = null; - if (fillValueString.startsWith("0b")) { - buf = Utils.makeByteBuffer(dataType.getByteCount(), b -> { - for (int i = 0; i < dataType.getByteCount(); i++) { - b.put((byte) Integer.parseInt(fillValueString.substring(2 + i * 8, 2 + (i + 1) * 8), - 2)); - } - return b; - }); - } else if (fillValueString.startsWith("0x")) { - buf = Utils.makeByteBuffer(dataType.getByteCount(), b -> { - for (int i = 0; i < dataType.getByteCount(); i++) { - b.put((byte) Integer.parseInt(fillValueString.substring(2 + i * 2, 2 + (i + 1) * 2), - 16)); - } - return b; - }); - } - if (buf != null) { - switch (dataType) { - case BOOL: - return buf.get() != 0; - case INT8: - case UINT8: - return buf.get(); - case INT16: - case UINT16: - return buf.getShort(); - case INT32: - case UINT32: - return buf.getInt(); - case INT64: - case UINT64: - return buf.getLong(); - case FLOAT32: - return buf.getFloat(); - case FLOAT64: - return buf.getDouble(); - default: - // Fallback to throwing below - } - } - } - } - throw new ZarrException("Invalid fill value '" + fillValue + "'."); - } public ucar.ma2.Array allocateFillValueChunk() { return coreArrayMetadata.allocateFillValueChunk(); } + @Override + public ChunkKeyEncoding chunkKeyEncoding() { + return chunkKeyEncoding; + } + + @Override + public Object parsedFillValue() { + return parsedFillValue; + } + public int ndim() { return shape.length; } @@ -270,6 +167,16 @@ public int[] chunkShape() { return ((RegularChunkGrid) this.chunkGrid).configuration.chunkShape; } + @Override + public long[] shape() { + return shape; + } + + @Override + public DataType dataType() { + return dataType; + } + public int chunkSize() { return coreArrayMetadata.chunkSize(); } @@ -278,39 +185,5 @@ public int chunkByteLength() { return coreArrayMetadata.chunkByteLength(); } - public static final class CoreArrayMetadata { - - public final long[] shape; - public final int[] chunkShape; - public final DataType dataType; - public final Object parsedFillValue; - - public CoreArrayMetadata(long[] shape, int[] chunkShape, DataType dataType, - Object parsedFillValue) { - this.shape = shape; - this.chunkShape = chunkShape; - this.dataType = dataType; - this.parsedFillValue = parsedFillValue; - } - - public int ndim() { - return shape.length; - } - - public int chunkSize() { - return Arrays.stream(chunkShape) - .reduce(1, (acc, a) -> acc * a); - } - - public int chunkByteLength() { - return this.dataType.getByteCount() * chunkSize(); - } - - public ucar.ma2.Array allocateFillValueChunk() { - ucar.ma2.Array outputArray = ucar.ma2.Array.factory(dataType.getMA2DataType(), chunkShape); - MultiArrayUtils.fill(outputArray, parsedFillValue); - return outputArray; - } - } } diff --git a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java index 3d07ad4..212f473 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java @@ -5,15 +5,13 @@ import dev.zarr.zarrjava.v3.chunkgrid.RegularChunkGrid; import dev.zarr.zarrjava.v3.chunkkeyencoding.ChunkKeyEncoding; import dev.zarr.zarrjava.v3.chunkkeyencoding.DefaultChunkKeyEncoding; -import dev.zarr.zarrjava.v3.chunkkeyencoding.Separator; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; import dev.zarr.zarrjava.v3.chunkkeyencoding.V2ChunkKeyEncoding; import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.v3.codec.CodecBuilder; import dev.zarr.zarrjava.v3.codec.core.BytesCodec; -import dev.zarr.zarrjava.v3.codec.core.BytesCodec.Endian; -import dev.zarr.zarrjava.v3.codec.core.ShardingIndexedCodec; +import dev.zarr.zarrjava.core.codec.core.BytesCodec.Endian; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.function.Function; @@ -127,6 +125,7 @@ public ArrayMetadataBuilder withAttributes(Map attributes) { this.attributes = attributes; return this; } + public ArrayMetadataBuilder withStorageTransformers(Map[] storageTransformers) { this.storageTransformers = storageTransformers; return this; @@ -142,8 +141,6 @@ public ArrayMetadata build() throws ZarrException { if (chunkGrid == null) { throw new ZarrException("Chunk grid needs to be provided. Please call `.withChunkShape`."); } - - return new ArrayMetadata(shape, dataType, chunkGrid, chunkKeyEncoding, fillValue, codecs, dimensionNames, attributes, diff --git a/src/main/java/dev/zarr/zarrjava/v3/DataType.java b/src/main/java/dev/zarr/zarrjava/v3/DataType.java index d76fdc2..9d9842c 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/DataType.java +++ b/src/main/java/dev/zarr/zarrjava/v3/DataType.java @@ -2,7 +2,7 @@ import com.fasterxml.jackson.annotation.JsonValue; -public enum DataType { +public enum DataType implements dev.zarr.zarrjava.core.DataType { BOOL("bool", 1), INT8("int8", 1), INT16("int16", 2), diff --git a/src/main/java/dev/zarr/zarrjava/v3/Node.java b/src/main/java/dev/zarr/zarrjava/v3/Node.java index e2164cc..4362999 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/Node.java +++ b/src/main/java/dev/zarr/zarrjava/v3/Node.java @@ -24,4 +24,8 @@ public static ObjectMapper makeObjectMapper() { return objectMapper; } + public StoreHandle storeHandle() { + return storeHandle; + } + } diff --git a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/ChunkKeyEncoding.java b/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/ChunkKeyEncoding.java index dc34f2e..cf82ad8 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/ChunkKeyEncoding.java +++ b/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/ChunkKeyEncoding.java @@ -9,7 +9,7 @@ @JsonSubTypes.Type(value = DefaultChunkKeyEncoding.class, name = "default"), @JsonSubTypes.Type(value = V2ChunkKeyEncoding.class, name = "v2") }) -public abstract class ChunkKeyEncoding { +public abstract class ChunkKeyEncoding implements dev.zarr.zarrjava.core.chunkkeyencoding.ChunkKeyEncoding { public abstract String[] encodeChunkKey(long[] chunkCoords); diff --git a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/DefaultChunkKeyEncoding.java b/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/DefaultChunkKeyEncoding.java index f4d9117..52b7ea6 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/DefaultChunkKeyEncoding.java +++ b/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/DefaultChunkKeyEncoding.java @@ -2,6 +2,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; + import java.util.Arrays; import java.util.stream.Collectors; import java.util.stream.Stream; diff --git a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/V2ChunkKeyEncoding.java b/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/V2ChunkKeyEncoding.java index 3ac4f59..d72b429 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/V2ChunkKeyEncoding.java +++ b/src/main/java/dev/zarr/zarrjava/v3/chunkkeyencoding/V2ChunkKeyEncoding.java @@ -2,6 +2,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; + import java.util.Arrays; import java.util.stream.Collectors; import java.util.stream.Stream; diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/ArrayArrayCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/ArrayArrayCodec.java deleted file mode 100644 index a488d30..0000000 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/ArrayArrayCodec.java +++ /dev/null @@ -1,14 +0,0 @@ -package dev.zarr.zarrjava.v3.codec; - -import dev.zarr.zarrjava.ZarrException; -import ucar.ma2.Array; - -public abstract class ArrayArrayCodec extends Codec { - - protected abstract Array encode(Array chunkArray) - throws ZarrException; - - protected abstract Array decode(Array chunkArray) - throws ZarrException; - -} diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/BytesBytesCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/BytesBytesCodec.java deleted file mode 100644 index 6435463..0000000 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/BytesBytesCodec.java +++ /dev/null @@ -1,13 +0,0 @@ -package dev.zarr.zarrjava.v3.codec; - -import dev.zarr.zarrjava.ZarrException; - -import java.nio.ByteBuffer; - -public abstract class BytesBytesCodec extends Codec { - - protected abstract ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException; - - public abstract ByteBuffer decode(ByteBuffer chunkBytes) throws ZarrException; - -} diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/Codec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/Codec.java index 988dd1d..d9631a1 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/Codec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/Codec.java @@ -5,22 +5,7 @@ import dev.zarr.zarrjava.v3.ArrayMetadata; @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "name") -public abstract class Codec { - - protected ArrayMetadata.CoreArrayMetadata arrayMetadata; - - protected ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException { - if (arrayMetadata == null) { - throw new ZarrException("arrayMetadata needs to get set in for every codec"); - } - return this.arrayMetadata; - } - - protected abstract long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata) - throws ZarrException; - - public void setCoreArrayMetadata(ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException{ - this.arrayMetadata = arrayMetadata; - } +public interface Codec extends dev.zarr.zarrjava.v2.codec.Codec { + long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException; } diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java b/src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java index 3776a43..ad2f70c 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java @@ -2,28 +2,24 @@ import com.scalableminds.bloscjava.Blosc; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.core.codec.ArrayArrayCodec; +import dev.zarr.zarrjava.core.codec.ArrayBytesCodec; +import dev.zarr.zarrjava.core.codec.BytesBytesCodec; import dev.zarr.zarrjava.v3.DataType; -import dev.zarr.zarrjava.v3.codec.core.BloscCodec; -import dev.zarr.zarrjava.v3.codec.core.BytesCodec; +import dev.zarr.zarrjava.v3.codec.core.*; import dev.zarr.zarrjava.v3.codec.core.BytesCodec.Configuration; -import dev.zarr.zarrjava.v3.codec.core.BytesCodec.Endian; -import dev.zarr.zarrjava.v3.codec.core.Crc32cCodec; -import dev.zarr.zarrjava.v3.codec.core.GzipCodec; -import dev.zarr.zarrjava.v3.codec.core.ShardingIndexedCodec; -import dev.zarr.zarrjava.v3.codec.core.TransposeCodec; -import dev.zarr.zarrjava.v3.codec.core.ZstdCodec; +import dev.zarr.zarrjava.core.codec.core.BytesCodec.Endian; + import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.function.Function; -public class CodecBuilder { - - final private DataType dataType; - private List codecs; +public class CodecBuilder extends dev.zarr.zarrjava.core.codec.CodecBuilder { + protected List codecs; public CodecBuilder(DataType dataType) { - this.dataType = dataType; + super(dataType); this.codecs = new ArrayList<>(); } @@ -129,7 +125,7 @@ public CodecBuilder withSharding(int[] chunkShape, public CodecBuilder withSharding(int[] chunkShape, Function codecBuilder, String indexLocation) { - CodecBuilder nestedBuilder = new CodecBuilder(dataType); + CodecBuilder nestedBuilder = new CodecBuilder((DataType) dataType); try { codecs.add(new ShardingIndexedCodec( new ShardingIndexedCodec.Configuration(chunkShape, @@ -161,7 +157,6 @@ private void autoInsertBytesCodec() { public Codec[] build() { autoInsertBytesCodec(); - return codecs.toArray(new Codec[0]); } } diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/CodecRegistry.java b/src/main/java/dev/zarr/zarrjava/v3/codec/CodecRegistry.java index efed1f2..16b52a1 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/CodecRegistry.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/CodecRegistry.java @@ -1,13 +1,8 @@ package dev.zarr.zarrjava.v3.codec; import com.fasterxml.jackson.databind.jsontype.NamedType; -import dev.zarr.zarrjava.v3.codec.core.BloscCodec; -import dev.zarr.zarrjava.v3.codec.core.BytesCodec; -import dev.zarr.zarrjava.v3.codec.core.Crc32cCodec; -import dev.zarr.zarrjava.v3.codec.core.GzipCodec; -import dev.zarr.zarrjava.v3.codec.core.ShardingIndexedCodec; -import dev.zarr.zarrjava.v3.codec.core.TransposeCodec; -import dev.zarr.zarrjava.v3.codec.core.ZstdCodec; +import dev.zarr.zarrjava.v3.codec.core.*; + import java.util.HashMap; import java.util.Map; diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/BloscCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/BloscCodec.java index 819a610..caa385f 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/BloscCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/BloscCodec.java @@ -13,14 +13,15 @@ import com.fasterxml.jackson.databind.ser.std.StdSerializer; import com.scalableminds.bloscjava.Blosc; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.ArrayMetadata; -import dev.zarr.zarrjava.v3.codec.BytesBytesCodec; + import java.io.IOException; import java.nio.ByteBuffer; import javax.annotation.Nonnull; -public class BloscCodec extends BytesBytesCodec { +public class BloscCodec extends dev.zarr.zarrjava.core.codec.core.BloscCodec implements Codec{ public final String name = "blosc"; @Nonnull @@ -32,16 +33,6 @@ public BloscCodec( this.configuration = configuration; } - @Override - public ByteBuffer decode(ByteBuffer chunkBytes) - throws ZarrException { - try { - return ByteBuffer.wrap(Blosc.decompress(Utils.toArray(chunkBytes))); - } catch (Exception ex) { - throw new ZarrException("Error in decoding blosc.", ex); - } - } - @Override public ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException { @@ -90,50 +81,6 @@ public void serialize(Blosc.Shuffle shuffle, JsonGenerator generator, } } - public static final class CustomCompressorDeserializer extends StdDeserializer { - - public CustomCompressorDeserializer() { - this(null); - } - - public CustomCompressorDeserializer(Class vc) { - super(vc); - } - - @Override - public Blosc.Compressor deserialize(JsonParser jsonParser, DeserializationContext ctxt) - throws IOException { - String cname = jsonParser.getCodec() - .readValue(jsonParser, String.class); - Blosc.Compressor compressor = Blosc.Compressor.fromString(cname); - if (compressor == null) { - throw new JsonParseException( - jsonParser, - String.format("Could not parse the Blosc.Compressor. Got '%s'", cname) - ); - } - return compressor; - } - } - - public static final class CustomCompressorSerializer extends StdSerializer { - - public CustomCompressorSerializer() { - super(Blosc.Compressor.class); - } - - public CustomCompressorSerializer(Class t) { - super(t); - } - - @Override - public void serialize(Blosc.Compressor compressor, JsonGenerator generator, - SerializerProvider provider) - throws IOException { - generator.writeString(compressor.getValue()); - } - } - public static final class CustomShuffleDeserializer extends StdDeserializer { public CustomShuffleDeserializer() { @@ -192,7 +139,7 @@ public Configuration( @JsonProperty(value = "blocksize", defaultValue = "0") int blocksize ) throws ZarrException { - if (typesize < 1) { + if (typesize < 1 && shuffle != Blosc.Shuffle.NO_SHUFFLE) { throw new ZarrException("'typesize' needs to be larger than 0."); } if (clevel < 0 || clevel > 9) { diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/BytesCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/BytesCodec.java index 1415da9..311e9f5 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/BytesCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/BytesCodec.java @@ -2,16 +2,14 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonValue; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.v3.ArrayMetadata; -import dev.zarr.zarrjava.v3.codec.ArrayBytesCodec; -import java.nio.ByteBuffer; + import java.nio.ByteOrder; import javax.annotation.Nonnull; -import ucar.ma2.Array; -public class BytesCodec extends ArrayBytesCodec { +public class BytesCodec extends dev.zarr.zarrjava.core.codec.core.BytesCodec implements Codec { public final String name = "bytes"; @Nonnull @@ -28,50 +26,18 @@ public BytesCodec(Endian endian) { this(new BytesCodec.Configuration(endian)); } - @Override - public Array decode(ByteBuffer chunkBytes) { - chunkBytes.order(configuration.endian.getByteOrder()); - return Array.factory(arrayMetadata.dataType.getMA2DataType(), arrayMetadata.chunkShape, - chunkBytes); - } - - @Override - public ByteBuffer encode(Array chunkArray) { - return chunkArray.getDataAsByteBuffer(configuration.endian.getByteOrder()); - } - @Override public long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException { return inputByteLength; } - public enum Endian { - LITTLE("little"), - BIG("big"); - private final String endian; - - Endian(String endian) { - this.endian = endian; - } - - @JsonValue - public String getValue() { - return endian; - } - - public ByteOrder getByteOrder() { - switch (this) { - case LITTLE: - return ByteOrder.LITTLE_ENDIAN; - case BIG: - return ByteOrder.BIG_ENDIAN; - default: - throw new RuntimeException("Unreachable"); - } - } + @Override + protected ByteOrder getByteOrder() { + return configuration.endian.getByteOrder(); } + public static final class Configuration{ @Nonnull diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/Crc32cCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/Crc32cCodec.java index a1e3cb5..823106d 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/Crc32cCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/Crc32cCodec.java @@ -2,14 +2,15 @@ import com.fasterxml.jackson.annotation.JsonCreator; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.utils.CRC32C; import dev.zarr.zarrjava.utils.Utils; -import dev.zarr.zarrjava.v3.codec.BytesBytesCodec; -import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata; +import dev.zarr.zarrjava.core.codec.BytesBytesCodec; +import dev.zarr.zarrjava.core.ArrayMetadata.CoreArrayMetadata; import java.nio.ByteBuffer; import java.nio.ByteOrder; -public class Crc32cCodec extends BytesBytesCodec { +public class Crc32cCodec extends BytesBytesCodec implements Codec { public final String name = "crc32c"; diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/GzipCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/GzipCodec.java index 3ff5acd..c197fc4 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/GzipCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/GzipCodec.java @@ -3,20 +3,19 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.ArrayMetadata; -import dev.zarr.zarrjava.v3.codec.BytesBytesCodec; +import dev.zarr.zarrjava.core.codec.BytesBytesCodec; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import javax.annotation.Nonnull; -public class GzipCodec extends BytesBytesCodec { +public class GzipCodec extends BytesBytesCodec implements Codec { public final String name = "gzip"; @Nonnull @@ -28,20 +27,14 @@ public GzipCodec( this.configuration = configuration; } - private void copy(InputStream inputStream, OutputStream outputStream) throws IOException { - byte[] buffer = new byte[4096]; - int len; - while ((len = inputStream.read(buffer)) > 0) { - outputStream.write(buffer, 0, len); - } - } + @Override public ByteBuffer decode(ByteBuffer chunkBytes) throws ZarrException { try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); GZIPInputStream inputStream = new GZIPInputStream( new ByteArrayInputStream(Utils.toArray(chunkBytes)))) { - copy(inputStream, outputStream); + Utils.copyStream(inputStream, outputStream); inputStream.close(); return ByteBuffer.wrap(outputStream.toByteArray()); } catch (IOException ex) { diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ShardingIndexedCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ShardingIndexedCodec.java index 0dce734..dd30a93 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ShardingIndexedCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ShardingIndexedCodec.java @@ -8,11 +8,11 @@ import dev.zarr.zarrjava.utils.MultiArrayUtils; import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.ArrayMetadata; -import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata; +import dev.zarr.zarrjava.core.ArrayMetadata.CoreArrayMetadata; import dev.zarr.zarrjava.v3.DataType; -import dev.zarr.zarrjava.v3.codec.ArrayBytesCodec; +import dev.zarr.zarrjava.core.codec.ArrayBytesCodec; import dev.zarr.zarrjava.v3.codec.Codec; -import dev.zarr.zarrjava.v3.codec.CodecPipeline; +import dev.zarr.zarrjava.core.codec.CodecPipeline; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -22,7 +22,7 @@ import ucar.ma2.InvalidRangeException; -public class ShardingIndexedCodec extends ArrayBytesCodec.WithPartialDecode { +public class ShardingIndexedCodec extends ArrayBytesCodec.WithPartialDecode implements Codec { public final String name = "sharding_indexed"; @Nonnull diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/TransposeCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/TransposeCodec.java index 4d614ae..5eebefe 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/TransposeCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/TransposeCodec.java @@ -3,8 +3,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.v3.ArrayMetadata; -import dev.zarr.zarrjava.v3.codec.ArrayArrayCodec; +import dev.zarr.zarrjava.core.codec.ArrayArrayCodec; import ucar.ma2.Array; import javax.annotation.Nonnull; @@ -13,7 +14,7 @@ import static dev.zarr.zarrjava.utils.Utils.inversePermutation; import static dev.zarr.zarrjava.utils.Utils.isPermutation; -public class TransposeCodec extends ArrayArrayCodec { +public class TransposeCodec extends ArrayArrayCodec implements Codec{ @Nonnull public final String name = "transpose"; @@ -70,7 +71,7 @@ public Configuration(@JsonProperty(value = "order") int[] order) { } @Override - protected ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException { + public ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException { super.resolveArrayMetadata(); assert arrayMetadata.ndim() == configuration.order.length; diff --git a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java index f042f11..b79b1ac 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java +++ b/src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java @@ -5,13 +5,14 @@ import com.github.luben.zstd.Zstd; import com.github.luben.zstd.ZstdCompressCtx; import dev.zarr.zarrjava.ZarrException; +import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.v3.ArrayMetadata; -import dev.zarr.zarrjava.v3.codec.BytesBytesCodec; +import dev.zarr.zarrjava.core.codec.BytesBytesCodec; import javax.annotation.Nonnull; import java.nio.ByteBuffer; -public class ZstdCodec extends BytesBytesCodec { +public class ZstdCodec extends BytesBytesCodec implements Codec { public final String name = "zstd"; @Nonnull diff --git a/src/test/java/dev/zarr/zarrjava/ZarrPythonTests.java b/src/test/java/dev/zarr/zarrjava/ZarrPythonTests.java new file mode 100644 index 0000000..c595005 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/ZarrPythonTests.java @@ -0,0 +1,247 @@ +package dev.zarr.zarrjava; + +import dev.zarr.zarrjava.store.FilesystemStore; +import dev.zarr.zarrjava.store.StoreHandle; +import dev.zarr.zarrjava.v3.Array; +import dev.zarr.zarrjava.v3.ArrayMetadataBuilder; +import dev.zarr.zarrjava.v3.DataType; +import dev.zarr.zarrjava.v3.codec.CodecBuilder; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + +public class ZarrPythonTests { + + final static Path TESTOUTPUT = Paths.get("testoutput"); + final static Path PYTHON_TEST_PATH = Paths.get("src/test/python-scripts/"); + + @BeforeAll + public static void clearTestoutputFolder() throws IOException { + if (Files.exists(TESTOUTPUT)) { + try (Stream walk = Files.walk(TESTOUTPUT)) { + walk.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } + } + Files.createDirectory(TESTOUTPUT); + } + + public void run_python_script(String scriptName, String... args) throws IOException, InterruptedException { + ProcessBuilder pb = new ProcessBuilder(); + pb.command().add("uv"); + pb.command().add("run"); + pb.command().add(PYTHON_TEST_PATH.resolve(scriptName).toString()); + pb.command().addAll(Arrays.asList(args)); + Process process = pb.start(); + + BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); + String line; + while ((line = reader.readLine()) != null) { + System.out.println(line); + } + + BufferedReader readerErr = new BufferedReader(new InputStreamReader(process.getErrorStream())); + while ((line = readerErr.readLine()) != null) { + System.err.println(line); + } + + int exitCode = process.waitFor(); + assert exitCode == 0; + } + + @ParameterizedTest + @CsvSource({ + "blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9", + "gzip,0", "gzip,5", + "zstd,0_true", "zstd,5_true", "zstd,0_false", "zstd,5_false", + "bytes,BIG", "bytes,LITTLE", + "transpose,_", + "sharding,start", "sharding,end", + "sharding_nested,_", + "crc32c,_", + }) + public void testReadFromZarrPythonV3(String codec, String codecParam) throws IOException, ZarrException, InterruptedException { + StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("read_from_zarr_python", codec, codecParam); + run_python_script("zarr_python_write.py", codec, codecParam, storeHandle.toPath().toString()); + Array array = Array.open(storeHandle); + ucar.ma2.Array result = array.read(); + + //for expected values see zarr_python_write.py + Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape()); + Assertions.assertEquals(DataType.INT32, array.metadata.dataType); + Assertions.assertArrayEquals(new int[]{2, 4, 8}, array.metadata.chunkShape()); + Assertions.assertEquals(42, array.metadata.attributes.get("answer")); + + int[] expectedData = new int[16 * 16 * 16]; + Arrays.setAll(expectedData, p -> p); + Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); + } + + @ParameterizedTest + @CsvSource({ + "blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9", + "gzip,0", "gzip,5", + "zstd,0_true", "zstd,5_true", "zstd,0_false", "zstd,5_false", + "bytes,BIG", "bytes,LITTLE", + "transpose,_", + "sharding,start", "sharding,end", + "sharding_nested,_", + "crc32c,_", + }) + public void testWriteReadWithZarrPythonV3(String codec, String codecParam) throws Exception { + int[] testData = new int[16 * 16 * 16]; + Arrays.setAll(testData, p -> p); + + Map attributes = new HashMap<>(); + attributes.put("test_key", "test_value"); + StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarr_python", codec, codecParam); + + ArrayMetadataBuilder builder = Array.metadataBuilder() + .withShape(16, 16, 16) + .withDataType(DataType.UINT32) + .withChunkShape(2, 4, 8) + .withFillValue(0) + .withAttributes(attributes); + + switch (codec) { + case "blosc": + String cname = codecParam.split("_")[0]; + String shuffle = codecParam.split("_")[1]; + int clevel_blosc = Integer.parseInt(codecParam.split("_")[2]); + builder = builder.withCodecs(c -> c.withBlosc(cname, shuffle, clevel_blosc)); + break; + case "gzip": + builder = builder.withCodecs(c -> c.withGzip(Integer.parseInt(codecParam))); + break; + case "zstd": + int clevel_zstd = Integer.parseInt(codecParam.split("_")[0]); + boolean checksum = Boolean.parseBoolean(codecParam.split("_")[1]); + builder = builder.withCodecs(c -> c.withZstd(clevel_zstd, checksum)); + break; + case "bytes": + builder = builder.withCodecs(c -> c.withBytes(codecParam)); + break; + case "transpose": + builder = builder.withCodecs(c -> c.withTranspose(new int[]{1, 0, 2})); + break; + case "sharding": + builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withBytes("LITTLE"), codecParam)); + break; + case "sharding_nested": + builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withSharding(new int[]{2, 1, 2}, c2 -> c2.withBytes("LITTLE")))); + break; + case "crc32c": + builder = builder.withCodecs(CodecBuilder::withCrc32c); + break; + default: + throw new IllegalArgumentException("Invalid Codec: " + codec); + } + + Array writeArray = Array.create(storeHandle, builder.build()); + writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData)); + + //read in zarr-java + Array readArray = Array.open(storeHandle); + ucar.ma2.Array result = readArray.read(); + + Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape()); + Assertions.assertEquals(DataType.UINT32, readArray.metadata.dataType); + Assertions.assertArrayEquals(new int[]{2, 4, 8}, readArray.metadata.chunkShape()); + Assertions.assertEquals("test_value", readArray.metadata.attributes.get("test_key")); + + Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + + //read in zarr_python + run_python_script("zarr_python_read.py", codec, codecParam, storeHandle.toPath().toString()); + } + + + @ParameterizedTest + @CsvSource({ + "zlib,0", "zlib,5", + "blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9", + }) + public void testReadFromZarrPythonV2(String compressor, String compressorParam) throws IOException, ZarrException, InterruptedException { + StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("read_from_zarr_python_v2", compressor, compressorParam); + run_python_script("zarr_python_write_v2.py", compressor, compressorParam, storeHandle.toPath().toString()); + + dev.zarr.zarrjava.v2.Array array = dev.zarr.zarrjava.v2.Array.open(storeHandle); + ucar.ma2.Array result = array.read(); + + //for expected values see zarr_python_write.py + Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape()); + Assertions.assertEquals(dev.zarr.zarrjava.v2.DataType.INT32, array.metadata.dataType); + Assertions.assertArrayEquals(new int[]{2, 4, 8}, array.metadata.chunkShape()); +// Assertions.assertEquals(42, array.metadata.attributes.get("answer")); + + int[] expectedData = new int[16 * 16 * 16]; + Arrays.setAll(expectedData, p -> p); + Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); + } + + + @ParameterizedTest + @CsvSource({ + "zlib,0", "zlib,5", + "blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9", + }) + public void testWriteReadWithZarrPythonV2(String compressor, String compressorParam) throws Exception { + int[] testData = new int[16 * 16 * 16]; + Arrays.setAll(testData, p -> p); + +// Map attributes = new HashMap<>(); +// attributes.put("test_key", "test_value"); + StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarr_python_v2", compressor, compressorParam); + + dev.zarr.zarrjava.v2.ArrayMetadataBuilder builder = dev.zarr.zarrjava.v2.Array.metadataBuilder() + .withShape(16, 16, 16) + .withDataType(dev.zarr.zarrjava.v2.DataType.UINT32) + .withChunks(2, 4, 8) +// .withAttributes(attributes) + .withFillValue(0); + + switch (compressor) { + case "blosc": + String cname = compressorParam.split("_")[0]; + String shuffle = compressorParam.split("_")[1]; + int clevel_blosc = Integer.parseInt(compressorParam.split("_")[2]); + builder = builder.withBloscCompressor(cname, shuffle, clevel_blosc); + break; + case "zlib": + builder = builder.withZlibCompressor(Integer.parseInt(compressorParam)); + break; + default: + throw new IllegalArgumentException("Invalid compressor: " + compressor); + } + + dev.zarr.zarrjava.v2.Array writeArray = dev.zarr.zarrjava.v2.Array.create(storeHandle, builder.build()); + writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData)); + + //read in zarr-java + dev.zarr.zarrjava.v2.Array readArray = dev.zarr.zarrjava.v2.Array.open(storeHandle); + ucar.ma2.Array result = readArray.read(); + + Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape()); + Assertions.assertEquals(dev.zarr.zarrjava.v2.DataType.UINT32, readArray.metadata.dataType); + Assertions.assertArrayEquals(new int[]{2, 4, 8}, readArray.metadata.chunkShape()); +// Assertions.assertEquals("test_value", readArray.metadata.attributes.get("test_key")); + + Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + + //read in zarr_python + run_python_script("zarr_python_read_v2.py", compressor, compressorParam, storeHandle.toPath().toString()); + } +} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrTest.java b/src/test/java/dev/zarr/zarrjava/ZarrTest.java index b9489a1..647a3b3 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrTest.java @@ -7,11 +7,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.github.luben.zstd.Zstd; import com.github.luben.zstd.ZstdCompressCtx; -import com.google.common.collect.Maps; import dev.zarr.zarrjava.store.*; import dev.zarr.zarrjava.utils.MultiArrayUtils; import dev.zarr.zarrjava.v3.*; -import dev.zarr.zarrjava.v3.codec.Codec; import dev.zarr.zarrjava.v3.codec.CodecBuilder; import dev.zarr.zarrjava.v3.codec.core.BytesCodec; import dev.zarr.zarrjava.v3.codec.core.TransposeCodec; @@ -36,6 +34,7 @@ import java.util.function.Function; import java.util.stream.Stream; +import static dev.zarr.zarrjava.core.ArrayMetadata.parseFillValue; import static org.junit.Assert.assertThrows; public class ZarrTest { @@ -44,13 +43,6 @@ public class ZarrTest { final static Path TESTOUTPUT = Paths.get("testoutput"); final static Path PYTHON_TEST_PATH = Paths.get("src/test/python-scripts/"); - public static String pythonPath() { - if (System.getProperty("os.name").startsWith("Windows")) { - return "venv_zarrita\\Scripts\\python.exe"; - } - return "venv_zarrita/bin/python"; - } - @BeforeAll public static void clearTestoutputFolder() throws IOException { if (Files.exists(TESTOUTPUT)) { @@ -61,51 +53,6 @@ public static void clearTestoutputFolder() throws IOException { Files.createDirectory(TESTOUTPUT); } - @ParameterizedTest - @CsvSource({ - "blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9", - "gzip,0", "gzip,5", - "zstd,0_true", "zstd,5_true", "zstd,0_false", "zstd,5_false", - "bytes,BIG", "bytes,LITTLE", - "transpose,_", - "sharding,start", "sharding,end", - "sharding_nested,_", - "crc32c,_", - }) - - public void testReadFromZarrita(String codec, String codecParam) throws IOException, ZarrException, InterruptedException { - String command = pythonPath(); - ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_write.py").toString(), codec, codecParam, TESTOUTPUT.toString()); - Process process = pb.start(); - - BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); - String line; - while ((line = reader.readLine()) != null) { - System.out.println(line); - } - - BufferedReader readerErr = new BufferedReader(new InputStreamReader(process.getErrorStream())); - while ((line = readerErr.readLine()) != null) { - System.err.println(line); - } - - int exitCode = process.waitFor(); - assert exitCode == 0; - - Array array = Array.open(new FilesystemStore(TESTOUTPUT).resolve("read_from_zarrita", codec, codecParam)); - ucar.ma2.Array result = array.read(); - - //for expected values see zarrita_write.py - Assertions.assertArrayEquals(new int[]{16, 16}, result.getShape()); - Assertions.assertEquals(DataType.INT32, array.metadata.dataType); - Assertions.assertArrayEquals(new int[]{2, 8}, array.metadata.chunkShape()); - Assertions.assertEquals(42, array.metadata.attributes.get("answer")); - - int[] expectedData = new int[16 * 16]; - Arrays.setAll(expectedData, p -> p); - Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT)); - } - @CsvSource({"0,true", "0,false", "5, true", "10, false"}) @ParameterizedTest public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException, InterruptedException { @@ -131,109 +78,16 @@ public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException //decompress in python Process process = new ProcessBuilder( - pythonPath(), - PYTHON_TEST_PATH.resolve("zstd_decompress.py").toString(), - compressedDataPath, - Integer.toString(number) + "uv", + "run", + PYTHON_TEST_PATH.resolve("zstd_decompress.py").toString(), + compressedDataPath, + Integer.toString(number) ).start(); int exitCode = process.waitFor(); assert exitCode == 0; } - @ParameterizedTest - @CsvSource({ - "blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9", - "gzip,0", "gzip,5", - "zstd,0_true", "zstd,5_true", "zstd,0_false", "zstd,5_false", - "bytes,BIG", "bytes,LITTLE", - "transpose,_", - "sharding,start", "sharding,end", - "sharding_nested,_", - "crc32c,_", - }) - public void testWriteReadWithZarrita(String codec, String codecParam) throws Exception { - int[] testData = new int[16 * 16 * 16]; - Arrays.setAll(testData, p -> p); - - Map attributes = new HashMap<>(); - attributes.put("test_key", "test_value"); - - StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarrita", codec, codecParam); - ArrayMetadataBuilder builder = Array.metadataBuilder() - .withShape(16, 16, 16) - .withDataType(DataType.UINT32) - .withChunkShape(2, 4, 8) - .withFillValue(0) - .withAttributes(attributes); - - switch (codec) { - case "blosc": - String cname = codecParam.split("_")[0]; - String shuffle = codecParam.split("_")[1]; - int clevel_blosc = Integer.parseInt(codecParam.split("_")[2]); - builder = builder.withCodecs(c -> c.withBlosc(cname, shuffle, clevel_blosc)); - break; - case "gzip": - builder = builder.withCodecs(c -> c.withGzip(Integer.parseInt(codecParam))); - break; - case "zstd": - int clevel_zstd = Integer.parseInt(codecParam.split("_")[0]); - boolean checksum = Boolean.parseBoolean(codecParam.split("_")[1]); - builder = builder.withCodecs(c -> c.withZstd(clevel_zstd, checksum)); - break; - case "bytes": - builder = builder.withCodecs(c -> c.withBytes(codecParam)); - break; - case "transpose": - builder = builder.withCodecs(c -> c.withTranspose(new int[]{1, 0, 2})); - break; - case "sharding": - builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withBytes("LITTLE"), codecParam)); - break; - case "sharding_nested": - builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withSharding(new int[]{2, 1, 2}, c2 -> c2.withBytes("LITTLE")))); - break; - case "crc32c": - builder = builder.withCodecs(CodecBuilder::withCrc32c); - break; - default: - throw new IllegalArgumentException("Invalid Codec: " + codec); - } - - Array writeArray = Array.create(storeHandle, builder.build()); - writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData)); - - //read in zarr-java - Array readArray = Array.open(storeHandle); - ucar.ma2.Array result = readArray.read(); - - Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape()); - Assertions.assertEquals(DataType.UINT32, readArray.metadata.dataType); - Assertions.assertArrayEquals(new int[]{2, 4, 8}, readArray.metadata.chunkShape()); - Assertions.assertEquals("test_value", readArray.metadata.attributes.get("test_key")); - - Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); - - //read in zarrita - String command = pythonPath(); - - ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_read.py").toString(), codec, codecParam, TESTOUTPUT.toString()); - Process process = pb.start(); - - BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream())); - String line; - while ((line = reader.readLine()) != null) { - System.out.println(line); - } - - BufferedReader readerErr = new BufferedReader(new InputStreamReader(process.getErrorStream())); - while ((line = readerErr.readLine()) != null) { - System.err.println(line); - } - - int exitCode = process.waitFor(); - assert exitCode == 0; - } static Stream> invalidCodecBuilder() { return Stream.of( @@ -337,11 +191,11 @@ public void testZstdCodecReadWrite(int clevel, boolean checksum) throws ZarrExce StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("testZstdCodecReadWrite", "checksum_" + checksum, "clevel_" + clevel); ArrayMetadataBuilder builder = Array.metadataBuilder() - .withShape(16, 16, 16) - .withDataType(DataType.UINT32) - .withChunkShape(2, 4, 8) - .withFillValue(0) - .withCodecs(c -> c.withZstd(clevel, checksum)); + .withShape(16, 16, 16) + .withDataType(DataType.UINT32) + .withChunkShape(2, 4, 8) + .withFillValue(0) + .withCodecs(c -> c.withZstd(clevel, checksum)); Array writeArray = Array.create(storeHandle, builder.build()); writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData)); @@ -401,46 +255,38 @@ public void testFileSystemStores() throws IOException, ZarrException { ObjectMapper objectMapper = Node.makeObjectMapper(); GroupMetadata group = objectMapper.readValue( - Files.readAllBytes(TESTDATA.resolve("l4_sample").resolve("zarr.json")), - GroupMetadata.class + Files.readAllBytes(TESTDATA.resolve("l4_sample").resolve("zarr.json")), + GroupMetadata.class ); System.out.println(group); System.out.println(objectMapper.writeValueAsString(group)); ArrayMetadata arrayMetadata = objectMapper.readValue(Files.readAllBytes(TESTDATA.resolve( - "l4_sample").resolve("color").resolve("1").resolve("zarr.json")), - ArrayMetadata.class); + "l4_sample").resolve("color").resolve("1").resolve("zarr.json")), + ArrayMetadata.class); System.out.println(arrayMetadata); System.out.println(objectMapper.writeValueAsString(arrayMetadata)); System.out.println( - Array.open(fsStore.resolve("l4_sample", "color", "1"))); + Array.open(fsStore.resolve("l4_sample", "color", "1"))); System.out.println( - Arrays.toString(Group.open(fsStore.resolve("l4_sample")).list().toArray(Node[]::new))); + Arrays.toString(Group.open(fsStore.resolve("l4_sample")).list().toArray(Node[]::new))); System.out.println( - Arrays.toString(((Group) Group.open(fsStore.resolve("l4_sample")).get("color")).list() - .toArray(Node[]::new))); + Arrays.toString(((Group) Group.open(fsStore.resolve("l4_sample")).get("color")).list() + .toArray(Node[]::new))); } @Test public void testS3Store() throws IOException, ZarrException { S3Store s3Store = new S3Store(AmazonS3ClientBuilder.standard() - .withRegion("eu-west-1") - .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) - .build(), "static.webknossos.org", "data"); + .withRegion("eu-west-1") + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(), "static.webknossos.org", "data"); System.out.println(Array.open(s3Store.resolve("zarr_v3", "l4_sample", "color", "1"))); } - @Test - public void testHttpStore() throws IOException, ZarrException { - HttpStore httpStore = new HttpStore("https://static.webknossos.org/data/"); - System.out.println( - dev.zarr.zarrjava.v2.Array.open(httpStore.resolve("l4_sample", "color", "1"))); - System.out.println(Array.open(httpStore.resolve("zarr_v3", "l4_sample", "color", "1"))); - } - @Test public void testV3ShardingReadCutout() throws IOException, ZarrException { Array array = Array.open(new FilesystemStore(TESTDATA).resolve("l4_sample", "color", "1")); @@ -455,14 +301,14 @@ public void testV3Access() throws IOException, ZarrException { Array readArray = Array.open(new FilesystemStore(TESTDATA).resolve("l4_sample", "color", "1")); ucar.ma2.Array outArray = readArray.access().withOffset(0, 3073, 3073, 513) - .withShape(1, 64, 64, 64) - .read(); + .withShape(1, 64, 64, 64) + .read(); Assertions.assertEquals(outArray.getSize(), 64 * 64 * 64); Assertions.assertEquals(outArray.getByte(0), -98); Array writeArray = Array.create( - new FilesystemStore(TESTOUTPUT).resolve("l4_sample_2", "color", "1"), - readArray.metadata + new FilesystemStore(TESTOUTPUT).resolve("l4_sample_2", "color", "1"), + readArray.metadata ); writeArray.access().withOffset(0, 3073, 3073, 513).write(outArray); } @@ -471,11 +317,11 @@ public void testV3Access() throws IOException, ZarrException { @ValueSource(strings = {"start", "end"}) public void testV3ShardingReadWrite(String indexLocation) throws IOException, ZarrException { Array readArray = Array.open( - new FilesystemStore(TESTDATA).resolve("sharding_index_location", indexLocation)); + new FilesystemStore(TESTDATA).resolve("sharding_index_location", indexLocation)); ucar.ma2.Array readArrayContent = readArray.read(); Array writeArray = Array.create( - new FilesystemStore(TESTOUTPUT).resolve("sharding_index_location", indexLocation), - readArray.metadata + new FilesystemStore(TESTOUTPUT).resolve("sharding_index_location", indexLocation), + readArray.metadata ); writeArray.write(readArrayContent); ucar.ma2.Array outArray = writeArray.read(); @@ -487,12 +333,12 @@ public void testV3ShardingReadWrite(String indexLocation) throws IOException, Za public void testV3Codecs() throws IOException, ZarrException { int[] readShape = new int[]{1, 1, 1024, 1024}; Array readArray = Array.open( - new FilesystemStore(TESTDATA).resolve("l4_sample", "color", "8-8-2")); + new FilesystemStore(TESTDATA).resolve("l4_sample", "color", "8-8-2")); ucar.ma2.Array readArrayContent = readArray.read(new long[4], readShape); { Array gzipArray = Array.create( - new FilesystemStore(TESTOUTPUT).resolve("l4_sample_gzip", "color", "8-8-2"), - Array.metadataBuilder(readArray.metadata).withCodecs(c -> c.withGzip(5)).build() + new FilesystemStore(TESTOUTPUT).resolve("l4_sample_gzip", "color", "8-8-2"), + Array.metadataBuilder(readArray.metadata).withCodecs(c -> c.withGzip(5)).build() ); gzipArray.write(readArrayContent); ucar.ma2.Array outGzipArray = gzipArray.read(new long[4], readShape); @@ -500,8 +346,8 @@ public void testV3Codecs() throws IOException, ZarrException { } { Array bloscArray = Array.create( - new FilesystemStore(TESTOUTPUT).resolve("l4_sample_blosc", "color", "8-8-2"), - Array.metadataBuilder(readArray.metadata).withCodecs(c -> c.withBlosc("zstd", 5)).build() + new FilesystemStore(TESTOUTPUT).resolve("l4_sample_blosc", "color", "8-8-2"), + Array.metadataBuilder(readArray.metadata).withCodecs(c -> c.withBlosc("zstd", 5)).build() ); bloscArray.write(readArrayContent); ucar.ma2.Array outBloscArray = bloscArray.read(new long[4], readShape); @@ -509,8 +355,8 @@ public void testV3Codecs() throws IOException, ZarrException { } { Array zstdArray = Array.create( - new FilesystemStore(TESTOUTPUT).resolve("l4_sample_zstd", "color", "8-8-2"), - Array.metadataBuilder(readArray.metadata).withCodecs(c -> c.withZstd(10)).build() + new FilesystemStore(TESTOUTPUT).resolve("l4_sample_zstd", "color", "8-8-2"), + Array.metadataBuilder(readArray.metadata).withCodecs(c -> c.withZstd(10)).build() ); zstdArray.write(readArrayContent); ucar.ma2.Array outZstdArray = zstdArray.read(new long[4], readShape); @@ -521,22 +367,22 @@ public void testV3Codecs() throws IOException, ZarrException { @Test public void testV3ArrayMetadataBuilder() throws ZarrException { Array.metadataBuilder() - .withShape(1, 4096, 4096, 1536) - .withDataType(DataType.UINT32) - .withChunkShape(1, 1024, 1024, 1024) - .withFillValue(0) - .withCodecs( - c -> c.withSharding(new int[]{1, 32, 32, 32}, CodecBuilder::withBlosc)) - .build(); + .withShape(1, 4096, 4096, 1536) + .withDataType(DataType.UINT32) + .withChunkShape(1, 1024, 1024, 1024) + .withFillValue(0) + .withCodecs( + c -> c.withSharding(new int[]{1, 32, 32, 32}, CodecBuilder::withBlosc)) + .build(); } @Test public void testV3FillValue() throws ZarrException { - Assertions.assertEquals((int) ArrayMetadata.parseFillValue(0, DataType.UINT32), 0); - Assertions.assertEquals((int) ArrayMetadata.parseFillValue("0x00010203", DataType.UINT32), 50462976); - Assertions.assertEquals((byte) ArrayMetadata.parseFillValue("0b00000010", DataType.UINT8), 2); - assert Double.isNaN((double) ArrayMetadata.parseFillValue("NaN", DataType.FLOAT64)); - assert Double.isInfinite((double) ArrayMetadata.parseFillValue("-Infinity", DataType.FLOAT64)); + Assertions.assertEquals((int) parseFillValue(0, DataType.UINT32), 0); + Assertions.assertEquals((int) parseFillValue("0x00010203", DataType.UINT32), 50462976); + Assertions.assertEquals((byte) parseFillValue("0b00000010", DataType.UINT8), 2); + assert Double.isNaN((double) parseFillValue("NaN", DataType.FLOAT64)); + assert Double.isInfinite((double) parseFillValue("-Infinity", DataType.FLOAT64)); } @Test @@ -549,54 +395,45 @@ public void testV3Group() throws IOException, ZarrException { Group group = Group.create(fsStore.resolve("testgroup")); Group group2 = group.createGroup("test2", attributes); Array array = group2.createArray("array", b -> - b.withShape(10, 10) - .withDataType(DataType.UINT8) - .withChunkShape(5, 5) + b.withShape(10, 10) + .withDataType(DataType.UINT8) + .withChunkShape(5, 5) ); array.write(new long[]{2, 2}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{8, 8})); Assertions.assertArrayEquals(((Array) ((Group) group.listAsArray()[0]).listAsArray()[0]).metadata.chunkShape(), new int[]{5, 5}); } - @Test - public void testV2() throws IOException { - FilesystemStore fsStore = new FilesystemStore(""); - HttpStore httpStore = new HttpStore("https://static.webknossos.org/data"); - - System.out.println(dev.zarr.zarrjava.v2.Array.open(httpStore.resolve("l4_sample", "color", "1"))); - } - - @Test public void testReadme1() throws IOException, ZarrException { Group hierarchy = Group.open( - new HttpStore("https://static.webknossos.org/data/zarr_v3") - .resolve("l4_sample") + new HttpStore("https://static.webknossos.org/data/zarr_v3") + .resolve("l4_sample") ); Group color = (Group) hierarchy.get("color"); Array array = (Array) color.get("1"); ucar.ma2.Array outArray = array.read( - new long[]{0, 3073, 3073, 513}, // offset - new int[]{1, 64, 64, 64} // shape + new long[]{0, 3073, 3073, 513}, // offset + new int[]{1, 64, 64, 64} // shape ); } @Test public void testReadme2() throws IOException, ZarrException { Array array = Array.create( - new FilesystemStore(TESTOUTPUT).resolve("testoutput", "color", "1"), - Array.metadataBuilder() - .withShape(1, 4096, 4096, 1536) - .withDataType(DataType.UINT32) - .withChunkShape(1, 1024, 1024, 1024) - .withFillValue(0) - .withCodecs(c -> c.withSharding(new int[]{1, 32, 32, 32}, c1 -> c1.withBlosc())) - .build() + new FilesystemStore(TESTOUTPUT).resolve("testoutput", "color", "1"), + Array.metadataBuilder() + .withShape(1, 4096, 4096, 1536) + .withDataType(DataType.UINT32) + .withChunkShape(1, 1024, 1024, 1024) + .withFillValue(0) + .withCodecs(c -> c.withSharding(new int[]{1, 32, 32, 32}, c1 -> c1.withBlosc())) + .build() ); ucar.ma2.Array data = ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1, 1, 2, 2}, new int[]{1, 2, 3, 4}); array.write( - new long[]{0, 0, 0, 0}, // offset - data + new long[]{0, 0, 0, 0}, // offset + data ); ucar.ma2.Array output = array.read(new long[]{0, 0, 0, 0}, new int[]{1, 1, 2, 2}); assert MultiArrayUtils.allValuesEqual(data, output); @@ -635,7 +472,7 @@ public void testReadL4Sample(String mag) throws IOException, ZarrException { } @ParameterizedTest - @ValueSource(booleans = {false,true}) + @ValueSource(booleans = {false, true}) public void testParallel(boolean useParallel) throws IOException, ZarrException { int[] testData = new int[512 * 512 * 512]; Arrays.setAll(testData, p -> p); @@ -654,7 +491,7 @@ public void testParallel(boolean useParallel) throws IOException, ZarrException ucar.ma2.Array result = readArray.read(useParallel); Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); - clearTestoutputFolder(); + clearTestoutputFolder(); } @Test @@ -671,17 +508,104 @@ public void testMetadataAcceptsEmptyStorageTransformer() throws ZarrException, I ); ArrayMetadataBuilder builderWithStorageTransformer = Array.metadataBuilder() - .withShape(1) - .withChunkShape(1) - .withDataType(DataType.UINT8) - .withStorageTransformers(new HashMap[]{new HashMap(){{ - put("some", "value"); - }}}); + .withShape(1) + .withChunkShape(1) + .withDataType(DataType.UINT8) + .withStorageTransformers(new HashMap[]{new HashMap() {{ + put("some", "value"); + }}}); assertThrows(ZarrException.class, () -> Array.create( new FilesystemStore(TESTOUTPUT).resolve("storage_transformer"), builderWithStorageTransformer.build() )); } -} + @ParameterizedTest + @CsvSource({"blosclz,noshuffle,0", "lz4,shuffle,6", "lz4hc,bitshuffle,3", "zlib,shuffle,5", "zstd,bitshuffle,9"}) + public void testV2createBlosc(String cname, String shuffle, int clevel) throws IOException, ZarrException { + dev.zarr.zarrjava.v2.Array array = dev.zarr.zarrjava.v2.Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_create_blosc", cname + "_" + shuffle + "_" + clevel), + dev.zarr.zarrjava.v2.Array.metadataBuilder() + .withShape(10, 10) + .withDataType(dev.zarr.zarrjava.v2.DataType.UINT8) + .withChunks(5, 5) + .withFillValue(1) + .withBloscCompressor(cname, shuffle, clevel) + .build() + ); + array.write(new long[]{2, 2}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{8, 8})); + + ucar.ma2.Array outArray = array.read(new long[]{2, 2}, new int[]{8, 8}); + Assertions.assertEquals(8 * 8, outArray.getSize()); + Assertions.assertEquals(0, outArray.getByte(0)); + } + + @Test + public void testV2create() throws IOException, ZarrException { + dev.zarr.zarrjava.v2.DataType dataType = dev.zarr.zarrjava.v2.DataType.UINT32; + + dev.zarr.zarrjava.v2.Array array = dev.zarr.zarrjava.v2.Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_create"), + dev.zarr.zarrjava.v2.Array.metadataBuilder() + .withShape(10, 10) + .withDataType(dataType) + .withChunks(5, 5) + .withFillValue(2) + .build() + ); + array.write(new long[]{2, 2}, ucar.ma2.Array.factory(dataType.getMA2DataType(), new int[]{8, 8})); + + ucar.ma2.Array outArray = array.read(new long[]{2, 2}, new int[]{8, 8}); + Assertions.assertEquals(8 * 8, outArray.getSize()); + Assertions.assertEquals(0, outArray.getByte(0)); + } + + @ParameterizedTest + @ValueSource(ints = {0, 1, 5, 9}) + public void testV2createZlib(int level) throws IOException, ZarrException { + dev.zarr.zarrjava.v2.Array array = dev.zarr.zarrjava.v2.Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_create_zlib", String.valueOf(level)), + dev.zarr.zarrjava.v2.Array.metadataBuilder() + .withShape(15, 10) + .withDataType(dev.zarr.zarrjava.v2.DataType.UINT8) + .withChunks(4, 5) + .withFillValue(5) + .withZlibCompressor(level) + .build() + ); + array.write(new long[]{2, 2}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{7, 6})); + + ucar.ma2.Array outArray = array.read(new long[]{2, 2}, new int[]{7, 6}); + Assertions.assertEquals(7 * 6, outArray.getSize()); + Assertions.assertEquals(0, outArray.getByte(0)); + } + + @ParameterizedTest + @ValueSource(strings = {"BOOL", "INT8", "UINT8", "INT16", "UINT16", "INT32", "UINT32", "INT64", "UINT64", "FLOAT32", "FLOAT64"}) + public void testV2noFillValue(dev.zarr.zarrjava.v2.DataType dataType) throws IOException, ZarrException { + StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_no_fillvalue", dataType.name()); + + dev.zarr.zarrjava.v2.Array array = dev.zarr.zarrjava.v2.Array.create( + storeHandle, + dev.zarr.zarrjava.v2.Array.metadataBuilder() + .withShape(15, 10) + .withDataType(dataType) + .withChunks(4, 5) + .build() + ); + Assertions.assertNull(array.metadata().fillValue); + + ucar.ma2.Array outArray = array.read(new long[]{0, 0}, new int[]{1, 1}); + if (dataType == dev.zarr.zarrjava.v2.DataType.BOOL) { + Assertions.assertFalse(outArray.getBoolean(0)); + } else { + Assertions.assertEquals(0, outArray.getByte(0)); + } + + dev.zarr.zarrjava.v2.Array array2 = dev.zarr.zarrjava.v2.Array.open( + storeHandle + ); + Assertions.assertNull(array2.metadata().fillValue); + } +} diff --git a/src/test/python-scripts/parse_codecs.py b/src/test/python-scripts/parse_codecs.py new file mode 100644 index 0000000..d720706 --- /dev/null +++ b/src/test/python-scripts/parse_codecs.py @@ -0,0 +1,83 @@ +import zarr +from zarr.codecs.blosc import BloscCodec +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.gzip import GzipCodec +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.codecs.transpose import TransposeCodec +from zarr.codecs.zstd import ZstdCodec +import zarrita +import numcodecs + +def parse_codecs_zarr_python(codec_string: str, param_string: str, zarr_version: int = 3): + compressor = None + filters = "auto" + serializer = "auto" + + if codec_string == "blosc" and zarr_version == 3: + cname, shuffle, clevel = param_string.split("_") + compressor = BloscCodec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel)) + elif codec_string == "blosc" and zarr_version == 2: + cname, shuffle, clevel = param_string.split("_") + if shuffle == "noshuffle": + shuffle = numcodecs.Blosc.NOSHUFFLE + elif shuffle == "shuffle": + shuffle = numcodecs.Blosc.SHUFFLE + elif shuffle == "bitshuffle": + shuffle = numcodecs.Blosc.BITSHUFFLE + else: + raise ValueError(f"Invalid shuffle: {shuffle}") + compressor = numcodecs.Blosc(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel)) + elif codec_string == "zlib" and zarr_version == 2: + compressor = numcodecs.Zlib(level=int(param_string)) + elif codec_string == "gzip" and zarr_version == 3: + compressor = GzipCodec(level=int(param_string)) + elif codec_string == "zstd" and zarr_version == 3: + level, checksum = param_string.split("_") + compressor = ZstdCodec(checksum=checksum == 'true', level=int(level)) + elif codec_string == "bytes" and zarr_version == 3: + serializer = BytesCodec(endian=param_string.lower()) + elif codec_string == "transpose" and zarr_version == 3: + filters = [TransposeCodec(order=(1, 0, 2))] + elif codec_string == "sharding" and zarr_version == 3: + serializer = ShardingCodec(chunk_shape=(2, 2, 4), codecs=(BytesCodec(endian="little"),), + index_location=ShardingCodecIndexLocation.start if param_string == "start" + else ShardingCodecIndexLocation.end) + elif codec_string == "sharding_nested" and zarr_version == 3: + serializer = ShardingCodec(chunk_shape=(2, 2, 4), codecs=(ShardingCodec(chunk_shape=(2, 1, 2), + codecs=[BytesCodec(endian="little")]),)) + elif codec_string == "crc32c" and zarr_version == 3: + compressor = Crc32cCodec() + else: + raise ValueError(f"Invalid codec: {codec_string}, zarr_version: {zarr_version}") + + return compressor, serializer, filters + +def parse_codecs_zarrita(codec_string: str, param_string: str): + codec = [] + if codec_string == "blosc": + cname, shuffle, clevel = param_string.split("_") + codec = [zarrita.codecs.bytes_codec(), + zarrita.codecs.blosc_codec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel))] + elif codec_string == "gzip": + codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec(level=int(param_string))] + elif codec_string == "zstd": + level, checksum = param_string.split("_") + codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=checksum == 'true', level=int(level))] + elif codec_string == "bytes": + codec = [zarrita.codecs.bytes_codec(endian=param_string.lower())] + elif codec_string == "transpose": + codec = [zarrita.codecs.transpose_codec((1, 0, 2)), zarrita.codecs.bytes_codec()] + elif codec_string == "sharding": + codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), codecs=[zarrita.codecs.bytes_codec("little")], + index_location=zarrita.metadata.ShardingCodecIndexLocation.start if param_string == "start" + else zarrita.metadata.ShardingCodecIndexLocation.end), + elif codec_string == "sharding_nested": + codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), + codecs=[zarrita.codecs.sharding_codec(chunk_shape=(2, 1, 2), codecs=[ + zarrita.codecs.bytes_codec("little")])]), + elif codec_string == "crc32c": + codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.crc32c_codec()] + else: + raise ValueError(f"Invalid codec: {codec_string}") + return codec diff --git a/src/test/python-scripts/zarr_python_read.py b/src/test/python-scripts/zarr_python_read.py new file mode 100644 index 0000000..1e31ff5 --- /dev/null +++ b/src/test/python-scripts/zarr_python_read.py @@ -0,0 +1,33 @@ +import sys +from pathlib import Path + +import numpy as np + +import zarr +from zarr.storage import LocalStore +from parse_codecs import parse_codecs_zarr_python + +codec_string = sys.argv[1] +param_string = sys.argv[2] +compressor, serializer, filters = parse_codecs_zarr_python(codec_string, param_string) +store_path = Path(sys.argv[3]) + +expected_data = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16) + +a = zarr.open_array(store=LocalStore(store_path)) +read_data = a[:, :] +assert np.array_equal(read_data, expected_data), f"got:\n {read_data} \nbut expected:\n {expected_data}" + +b = zarr.create_array( + LocalStore(store_path / "expected"), + shape=(16, 16, 16), + chunks=(2, 4, 8), + dtype="uint32", + fill_value=0, + filters=filters, + serializer=serializer, + compressors=compressor, + attributes={'test_key': 'test_value'}, +) + +assert a.metadata == b.metadata, f"not equal: \n{a.metadata=}\n{b.metadata=}" diff --git a/src/test/python-scripts/zarr_python_read_v2.py b/src/test/python-scripts/zarr_python_read_v2.py new file mode 100644 index 0000000..5390a23 --- /dev/null +++ b/src/test/python-scripts/zarr_python_read_v2.py @@ -0,0 +1,35 @@ +import sys +from pathlib import Path + +import numpy as np + +import zarr + +from zarr.storage import LocalStore +from parse_codecs import parse_codecs_zarr_python + +codec_string = sys.argv[1] +param_string = sys.argv[2] +compressor, serializer, filters = parse_codecs_zarr_python(codec_string, param_string, zarr_version=2) +store_path = Path(sys.argv[3]) + +expected_data = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16) + +a = zarr.open_array(store=LocalStore(store_path)) +read_data = a[:, :] +assert np.array_equal(read_data, expected_data), f"got:\n {read_data} \nbut expected:\n {expected_data}" + +b = zarr.create_array( + LocalStore(store_path / "expected"), + zarr_format=2, + shape=(16, 16, 16), + chunks=(2, 4, 8), + dtype='uint32', + fill_value=0, + filters=filters, + serializer=serializer, + compressors=compressor, +# attributes={'test_key': 'test_value'}, +) + +assert a.metadata == b.metadata, f"not equal: \n{a.metadata=}\n{b.metadata=}" diff --git a/src/test/python-scripts/zarr_python_write.py b/src/test/python-scripts/zarr_python_write.py new file mode 100644 index 0000000..531d356 --- /dev/null +++ b/src/test/python-scripts/zarr_python_write.py @@ -0,0 +1,27 @@ +import sys +from pathlib import Path + +import numpy as np + +import zarr +from zarr.storage import LocalStore +from parse_codecs import parse_codecs_zarr_python + +codec_string = sys.argv[1] +param_string = sys.argv[2] +compressor, serializer, filters = parse_codecs_zarr_python(codec_string, param_string) +store_path = Path(sys.argv[3]) + +testdata = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16) + +a = zarr.create_array( + LocalStore(store_path), + shape=(16, 16, 16), + chunks=(2, 4, 8), + dtype='int32', + filters=filters, + serializer=serializer, + compressors=compressor, + attributes={'answer': 42} +) +a[:, :] = testdata diff --git a/src/test/python-scripts/zarr_python_write_v2.py b/src/test/python-scripts/zarr_python_write_v2.py new file mode 100644 index 0000000..9c124c1 --- /dev/null +++ b/src/test/python-scripts/zarr_python_write_v2.py @@ -0,0 +1,29 @@ +import sys +from pathlib import Path + +import numpy as np + +import zarr + +from zarr.storage import LocalStore +from parse_codecs import parse_codecs_zarr_python + +codec_string = sys.argv[1] +param_string = sys.argv[2] +compressor, serializer, filters = parse_codecs_zarr_python(codec_string, param_string, zarr_version=2) +store_path = Path(sys.argv[3]) + +testdata = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16) + +a = zarr.create_array( + LocalStore(store_path), + zarr_format=2, + shape=(16, 16, 16), + chunks=(2, 4, 8), + dtype='int32', + filters=filters, + serializer=serializer, + compressors=compressor, + attributes={'answer': 42} +) +a[:, :] = testdata diff --git a/src/test/python-scripts/zarrita_read.py b/src/test/python-scripts/zarrita_read.py index f84bf9b..2769694 100644 --- a/src/test/python-scripts/zarrita_read.py +++ b/src/test/python-scripts/zarrita_read.py @@ -3,35 +3,11 @@ import numpy as np import zarrita from zarrita.metadata import ShardingCodecIndexLocation +from parse_codecs import parse_codecs_zarrita codec_string = sys.argv[1] param_string = sys.argv[2] -codec = [] -if codec_string == "blosc": - cname, shuffle, clevel = param_string.split("_") - codec = [zarrita.codecs.bytes_codec(), - zarrita.codecs.blosc_codec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel))] -elif codec_string == "gzip": - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec(level=int(param_string))] -elif codec_string == "zstd": - level, checksum = param_string.split("_") - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=checksum == 'true', level=int(level))] -elif codec_string == "bytes": - codec = [zarrita.codecs.bytes_codec(endian=param_string.lower())] -elif codec_string == "transpose": - codec = [zarrita.codecs.transpose_codec((1, 0, 2)), zarrita.codecs.bytes_codec()] -elif codec_string == "sharding": - codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), codecs=[zarrita.codecs.bytes_codec("little")], - index_location=ShardingCodecIndexLocation.start if param_string == "start" - else ShardingCodecIndexLocation.end), -elif codec_string == "sharding_nested": - codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), - codecs=[zarrita.codecs.sharding_codec(chunk_shape=(2, 1, 2), codecs=[ - zarrita.codecs.bytes_codec("little")])]), -elif codec_string == "crc32c": - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.crc32c_codec()] -else: - raise ValueError(f"Invalid {codec=}") +codec = parse_codecs_zarrita(codec_string, param_string) store = zarrita.LocalStore(sys.argv[3]) expected_data = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16) diff --git a/src/test/python-scripts/zarrita_write.py b/src/test/python-scripts/zarrita_write.py index 2eb0fc2..4000f60 100644 --- a/src/test/python-scripts/zarrita_write.py +++ b/src/test/python-scripts/zarrita_write.py @@ -3,44 +3,20 @@ import zarrita import numpy as np from zarrita.metadata import ShardingCodecIndexLocation +from parse_codecs import parse_codecs_zarrita codec_string = sys.argv[1] param_string = sys.argv[2] -codec = [] -if codec_string == "blosc": - cname, shuffle, clevel = param_string.split("_") - codec = [zarrita.codecs.bytes_codec(), - zarrita.codecs.blosc_codec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel))] -elif codec_string == "gzip": - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec(level=int(param_string))] -elif codec_string == "zstd": - level, checksum = param_string.split("_") - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=checksum == 'true', level=int(level))] -elif codec_string == "bytes": - codec = [zarrita.codecs.bytes_codec(endian=param_string.lower())] -elif codec_string == "transpose": - codec = [zarrita.codecs.transpose_codec((0, 1)), zarrita.codecs.bytes_codec()] -elif codec_string == "sharding": - codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 4), codecs=[zarrita.codecs.bytes_codec("little")], - index_location=ShardingCodecIndexLocation.start if param_string == "start" - else ShardingCodecIndexLocation.end), -elif codec_string == "sharding_nested": - codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 4), - codecs=[zarrita.codecs.sharding_codec(chunk_shape=(1, 2), codecs=[ - zarrita.codecs.bytes_codec("little")])]), -elif codec_string == "crc32c": - codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.crc32c_codec()] -else: - raise ValueError(f"Invalid {codec_string=}") +codec = parse_codecs_zarrita(codec_string, param_string) store = zarrita.LocalStore(sys.argv[3]) -testdata = np.arange(16 * 16, dtype='int32').reshape((16, 16)) +testdata = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16) a = zarrita.Array.create( store / 'read_from_zarrita' / codec_string / param_string, - shape=(16, 16), + shape=(16, 16, 16), + chunk_shape=(2, 4, 8), dtype='int32', - chunk_shape=(2, 8), codecs=codec, attributes={'answer': 42} )