Skip to content

Commit

Permalink
Merge pull request #7 from davidbrochart/zarr_v2
Browse files Browse the repository at this point in the history
Support for Zarr v2
  • Loading branch information
JohanMabille committed Nov 9, 2020
2 parents c509872 + 390a8ad commit 71619bc
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 47 deletions.
87 changes: 65 additions & 22 deletions include/xtensor-zarr/xzarr_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,52 +29,95 @@ namespace xt
j["data_type"] = dtype;
j["chunk_memory_layout"] = std::string(1, chunk_memory_layout);
nlohmann::json compressor_config;
if (strcmp(compressor.name, "binary"))
if (compressor.name != "binary")
{
j["compressor"]["codec"] = std::string("https://purl.org/zarr/spec/codec/") + compressor.name + "/1.0";
j["compressor"]["codec"] ="https://purl.org/zarr/spec/codec/" + compressor.name + "/1.0";
compressor.write_to(compressor_config);
j["compressor"]["configuration"] = compressor_config;
}
j["fill_value"] = fill_value;
j["extensions"] = nlohmann::json::array();
store[std::string("meta/root") + path + ".array.json"] = j.dump(4);
std::string full_path = store.get_root() + "/data/root" + path;
return xchunked_array_factory<store_type>::build(store, compressor.name, dtype, chunk_memory_layout, shape, chunk_shape, full_path, chunk_separator, attrs, compressor_config, chunk_pool_size, fill_value);
return xchunked_array_factory<store_type>::build(store, compressor.name, dtype, chunk_memory_layout, shape, chunk_shape, full_path, chunk_separator, attrs, compressor_config, chunk_pool_size, fill_value, 3);
}

template <class store_type>
zarray get_zarr_array(store_type store, const std::string& path, std::size_t chunk_pool_size)
zarray get_zarr_array(store_type store, const std::string& path, std::size_t chunk_pool_size, const std::string& zarr_version = "3")
{
std::string s = store[std::string("meta/root") + path + ".array.json"];
std::size_t i = zarr_version.find('.');
std::size_t zarr_major;
if (i == std::string::npos)
{
zarr_major = std::stoi(zarr_version);
}
else
{
zarr_major = std::stoi(zarr_version.substr(0, i));
}
std::string s;
switch (zarr_major)
{
case 3:
s = store[std::string("meta/root") + path + ".array.json"];
break;
case 2:
s = store[".zarray"];
break;
default:
XTENSOR_THROW(std::runtime_error, "Unsupported Zarr version: " + zarr_version);
}
auto j = nlohmann::json::parse(s);
auto json_shape = j["shape"];
auto json_chunk_shape = j["chunk_grid"]["chunk_shape"];
std::string dtype = j["data_type"];
std::string chunk_memory_layout = j["chunk_memory_layout"];
nlohmann::json json_chunk_shape;
std::string dtype;
std::string chunk_memory_layout;
std::string compressor;
nlohmann::json compressor_config;
if (j.contains("compressor"))
std::string chunk_separator;
std::string full_path;
switch (zarr_major)
{
compressor = j["compressor"]["codec"];
std::size_t i = compressor.rfind('/');
compressor = compressor.substr(0, i);
i = compressor.rfind('/') + 1;
compressor = compressor.substr(i, std::string::npos);
compressor_config = j["compressor"]["configuration"];
}
else
{
compressor = "binary";
case 3:
json_chunk_shape = j["chunk_grid"]["chunk_shape"];
dtype = j["data_type"];
chunk_memory_layout = j["chunk_memory_layout"];
if (j.contains("compressor"))
{
compressor = j["compressor"]["codec"];
i = compressor.rfind('/');
compressor = compressor.substr(0, i);
i = compressor.rfind('/') + 1;
compressor = compressor.substr(i, std::string::npos);
compressor_config = j["compressor"]["configuration"];
}
else
{
compressor = "binary";
}
chunk_separator = j["chunk_grid"]["separator"];
full_path = store.get_root() + "/data/root" + path;
break;
case 2:
json_chunk_shape = j["chunks"];
dtype = j["dtype"];
chunk_memory_layout = j["order"];
compressor = j["compressor"]["id"];
compressor_config = j["compressor"];
compressor_config.erase("id");
chunk_separator = '.';
full_path = store.get_root() + '/' + path;
break;
default:
break;
}
std::vector<std::size_t> shape(json_shape.size());
std::vector<std::size_t> chunk_shape(json_chunk_shape.size());
std::transform(json_shape.begin(), json_shape.end(), shape.begin(),
[](nlohmann::json& size) -> int { return stoi(size.dump()); });
std::transform(json_chunk_shape.begin(), json_chunk_shape.end(), chunk_shape.begin(),
[](nlohmann::json& size) -> int { return stoi(size.dump()); });
std::string chunk_separator = j["chunk_grid"]["separator"];
std::string full_path = store.get_root() + "/data/root" + path;
return xchunked_array_factory<store_type>::build(store, compressor, dtype, chunk_memory_layout[0], shape, chunk_shape, full_path, chunk_separator[0], j["attributes"], compressor_config, chunk_pool_size, j["fill_value"]);
return xchunked_array_factory<store_type>::build(store, compressor, dtype, chunk_memory_layout[0], shape, chunk_shape, full_path, chunk_separator[0], j["attributes"], compressor_config, chunk_pool_size, j["fill_value"], zarr_major);
}
}

Expand Down
10 changes: 5 additions & 5 deletions include/xtensor-zarr/xzarr_chunked_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
namespace xt
{
template <class store_type, class data_type>
zarray build_chunked_array_with_dtype(store_type& store, const std::string& compressor, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)
zarray build_chunked_array_with_dtype(store_type& store, const std::string& compressor, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)
{
return xcompressor_factory<store_type, data_type>::build(store, compressor, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, config, chunk_pool_size, fill_value_json);
return xcompressor_factory<store_type, data_type>::build(store, compressor, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, config, chunk_pool_size, fill_value_json, zarr_version);
}

template <class store_type>
Expand All @@ -39,7 +39,7 @@ namespace xt
instance().m_builders.insert(std::make_pair(name, &build_chunked_array_with_dtype<store_type, data_type>));
}

static zarray build(store_type& store, const std::string& compressor, const std::string& dtype, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)
static zarray build(store_type& store, const std::string& compressor, const std::string& dtype, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)
{
std::string dtype_noendian = dtype;
char endianness = dtype[0];
Expand All @@ -50,7 +50,7 @@ namespace xt
auto fun = instance().m_builders.find(dtype_noendian);
if (fun != instance().m_builders.end())
{
zarray z = (fun->second)(store, compressor, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, config, chunk_pool_size, fill_value_json);
zarray z = (fun->second)(store, compressor, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, config, chunk_pool_size, fill_value_json, zarr_version);
return z;
}
else
Expand Down Expand Up @@ -88,7 +88,7 @@ namespace xt
m_builders.insert(std::make_pair("f8", &build_chunked_array_with_dtype<store_type, double>));
}

std::map<std::string, zarray (*)(store_type& store, const std::string& compressor, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)> m_builders;
std::map<std::string, zarray (*)(store_type& store, const std::string& compressor, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)> m_builders;
};
}

Expand Down
16 changes: 14 additions & 2 deletions include/xtensor-zarr/xzarr_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ namespace xt

void set_directory(const std::string& directory);
void set_separator(const char separator);
void set_zarr_version(std::size_t zarr_version);
template <class I>
void index_to_path(I first, I last, std::string& path);

private:
std::string m_directory;
char m_separator;
std::size_t m_zarr_version;
};

// xzarr_attrs is meant to serve as a base class extension for xchunked_array
Expand All @@ -47,7 +49,9 @@ namespace xt
* xindex_path implementation *
******************************/

xzarr_index_path::xzarr_index_path(): m_separator('/')
xzarr_index_path::xzarr_index_path()
: m_separator('/')
, m_zarr_version(3)
{
}

Expand All @@ -65,6 +69,11 @@ namespace xt
m_separator = separator;
}

void xzarr_index_path::set_zarr_version(std::size_t zarr_version)
{
m_zarr_version = zarr_version;
}

template <class I>
void xzarr_index_path::index_to_path(I first, I last, std::string& path)
{
Expand All @@ -73,7 +82,10 @@ namespace xt
{
if (fname.empty())
{
fname.push_back('c');
if (m_zarr_version == 3)
{
fname.push_back('c');
}
}
else
{
Expand Down
61 changes: 48 additions & 13 deletions include/xtensor-zarr/xzarr_compressor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,32 @@

namespace xt
{
template <class T>
T get_nan()
{
return 0;
}

template <>
float get_nan<float>()
{
return std::nanf("");
}

template <>
double get_nan<double>()
{
return std::nan("");
}

template <>
long double get_nan<long double>()
{
return std::nanl("");
}

template <class store_type, class data_type, class io_handler, class format_config>
zarray build_chunked_array_impl(store_type& store, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, format_config&& config, const nlohmann::json& config_json, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)
zarray build_chunked_array_impl(store_type& store, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, format_config&& config, const nlohmann::json& config_json, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)
{
config.read_from(config_json);
config.big_endian = (endianness == '>');
Expand All @@ -38,17 +62,29 @@ namespace xt
if (fill_value_json.is_null())
{
auto a = chunked_file_array<data_type, io_handler, layout_type::dynamic, xzarr_index_path, xzarr_attrs>(shape, chunk_shape, path, chunk_pool_size, layout);
a.chunks().get_index_path().set_separator(separator);
auto& i2p = a.chunks().get_index_path();
i2p.set_separator(separator);
i2p.set_zarr_version(zarr_version);
auto io_config = store.get_io_config();
a.chunks().configure(config, io_config);
a.set_attrs(attrs);
return zarray(std::move(a));
}
else
{
data_type fill_value = fill_value_json;
data_type fill_value;
if (fill_value_json == "NaN")
{
fill_value = get_nan<data_type>();
}
else
{
fill_value = fill_value_json;
}
auto a = chunked_file_array<data_type, io_handler, layout_type::dynamic, xzarr_index_path, xzarr_attrs>(shape, chunk_shape, path, fill_value, chunk_pool_size, layout);
a.chunks().get_index_path().set_separator(separator);
auto& i2p = a.chunks().get_index_path();
i2p.set_separator(separator);
i2p.set_zarr_version(zarr_version);
auto io_config = store.get_io_config();
a.chunks().configure(config, io_config);
a.set_attrs(attrs);
Expand All @@ -57,10 +93,10 @@ namespace xt
}

template <class store_type, class data_type, class format_config>
zarray build_chunked_array_with_compressor(store_type& store, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)
zarray build_chunked_array_with_compressor(store_type& store, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)
{
using io_handler = typename store_type::template io_handler<format_config>;
return build_chunked_array_impl<store_type, data_type, io_handler>(store, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, format_config(), config, chunk_pool_size, fill_value_json);
return build_chunked_array_impl<store_type, data_type, io_handler>(store, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, format_config(), config, chunk_pool_size, fill_value_json, zarr_version);
}

template <class store_type, class data_type>
Expand All @@ -71,21 +107,20 @@ namespace xt
template <class format_config>
static void add_compressor(format_config&& c)
{
const char* name = c.name;
auto fun = instance().m_builders.find(name);
auto fun = instance().m_builders.find(c.name);
if (fun != instance().m_builders.end())
{
XTENSOR_THROW(std::runtime_error, "Compressor already registered: " + std::string(name));
XTENSOR_THROW(std::runtime_error, "Compressor already registered: " + std::string(c.name));
}
instance().m_builders.insert(std::make_pair(name, &build_chunked_array_with_compressor<store_type, data_type, format_config>));
instance().m_builders.insert(std::make_pair(c.name, &build_chunked_array_with_compressor<store_type, data_type, format_config>));
}

static zarray build(store_type& store, const std::string& compressor, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)
static zarray build(store_type& store, const std::string& compressor, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)
{
auto fun = instance().m_builders.find(compressor);
if (fun != instance().m_builders.end())
{
zarray z = (fun->second)(store, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, config, chunk_pool_size, fill_value_json);
zarray z = (fun->second)(store, chunk_memory_layout, shape, chunk_shape, path, separator, attrs, endianness, config, chunk_pool_size, fill_value_json, zarr_version);
return z;
}
else
Expand All @@ -110,7 +145,7 @@ namespace xt
m_builders.insert(std::make_pair(format_config().name, &build_chunked_array_with_compressor<store_type, data_type, format_config>));
}

std::map<std::string, zarray (*)(store_type& store, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json)> m_builders;
std::map<std::string, zarray (*)(store_type& store, char chunk_memory_layout, std::vector<std::size_t>& shape, std::vector<std::size_t>& chunk_shape, const std::string& path, char separator, const nlohmann::json& attrs, char endianness, nlohmann::json& config, std::size_t chunk_pool_size, const nlohmann::json& fill_value_json, std::size_t zarr_version)> m_builders;
};

template <class store_type, class format_config>
Expand Down

0 comments on commit 71619bc

Please sign in to comment.