-
Notifications
You must be signed in to change notification settings - Fork 5
/
dataset.hpp
295 lines (275 loc) · 11.9 KB
/
dataset.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
/*
* Copyright (c) 2024 MPI-M, Clara Bayley
*
*
* ----- CLEO -----
* File: dataset.hpp
* Project: zarr
* Created Date: Monday 18th March 2024
* Author: Clara Bayley (CB)
* Additional Contributors:
* -----
* Last Modified: Wednesday 22nd May 2024
* Modified By: CB
* -----
* License: BSD 3-Clause "New" or "Revised" License
* https://opensource.org/licenses/BSD-3-Clause
* -----
* File Description:
* Structure to create a ZarrGroup which is xarray and netCDF compatible.
*/
#ifndef LIBS_ZARR_DATASET_HPP_
#define LIBS_ZARR_DATASET_HPP_
#include <Kokkos_Core.hpp>
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
#include "./xarray_zarr_array.hpp"
#include "./zarr_group.hpp"
/**
* @brief A class representing a dataset made from a Zarr group (i.e. collection of Zarr arrays)
* in a storage system.
*
* This class provides functionality to create a dataset as a group of arrays obeying the Zarr
* storage specification version 2 (https://zarr.readthedocs.io/en/stable/spec/v2.html) that is also
* compatible with Xarray and NetCDF.
*
* @tparam Store The type of the store object used by the dataset.
*/
template <typename Store>
class Dataset {
private:
ZarrGroup<Store> group; /**< Reference to the zarr group object. */
std::unordered_map<std::string, size_t>
datasetdims; /**< map from name of each dimension in dataset to their size */
/**
* @brief Adds a dimension to the dataset.
*
* @param dim A pair containing the name and size of the dimension to be added.
*/
void add_dimension(const std::pair<std::string, size_t> &dim) {
datasetdims.insert({dim.first, dim.second});
}
public:
/**
* @brief Constructs a Dataset with the specified store object.
*
* This constructor initializes a Dataset with the provided store object by initialising a
* ZarrGroup and writing some additional metatdata for Xarray and NetCDF.
*
* @param store The store object associated with the Dataset.
*/
explicit Dataset(Store &store) : group(store), datasetdims() {
store[".zattrs"] =
"{\n"
" \"creator\": \"Clara Bayley\",\n"
" \"title\": \"Dataset from CLEO is Xarray and NetCDF compatible Zarr Group of Arrays\""
"\n}";
}
/**
* @brief Returns the size of an existing dimension in the dataset.
*
* @param dimname A string for the name of the dimension in the dataset.
* @return The size of (i.e. number of elements along) the dimension.
*/
size_t get_dimension(const std::string &dimname) const { return datasetdims.at(dimname); }
/**
* @brief Sets the size of an existing dimension in the dataset.
*
* @param dim A pair containing the name of the dimension and its new size to be set.
*/
void set_dimension(const std::pair<std::string, size_t> &dim) {
datasetdims.at(dim.first) = dim.second;
}
/**
* @brief Creates a new array in the dataset.
*
* @tparam T The data type of the array.
* @param name The name of the new array.
* @param units The units of the array data.
* @param scale_factor The scale factor of array data.
* @param chunkshape The shape of the chunks of the array.
* @param dimnames The names of each dimension of the array.
* @return An instance of XarrayZarrArray representing the newly created array.
*/
template <typename T>
XarrayZarrArray<Store, T> create_array(const std::string_view name, const std::string_view units,
const double scale_factor,
const std::vector<size_t> &chunkshape,
const std::vector<std::string> &dimnames) const {
return XarrayZarrArray<Store, T>(group.store, datasetdims, name, units, scale_factor,
chunkshape, dimnames);
}
/**
* @brief Creates a new 1-D array for a coordinate of the dataset.
*
* @tparam T The data type of the coordinate array.
* @param name The name of the new coordinate.
* @param units The units of the coordinate.
* @param scale_factor The scale factor of the coordinate data.
* @param chunksize The size of each 1-D chunk of the coordinate array.
* @param dimsize The initial size of the coordinate (number of elements along array).
* @return An instance of XarrayZarrArray representing the newly created coordinate array.
*/
template <typename T>
XarrayZarrArray<Store, T> create_coordinate_array(const std::string_view name,
const std::string_view units,
const double scale_factor,
const size_t chunksize, const size_t dimsize) {
add_dimension(std::pair<std::string, size_t>{name, dimsize});
return create_array<T>(name, units, scale_factor, std::vector<size_t>{chunksize},
std::vector<std::string>{std::string(name)});
}
/**
* @brief Creates a new ragged array in the dataset.
*
* @tparam T The data type of the array.
* @param name The name of the new array.
* @param units The units of the array data.
* @param scale_factor The scale factor of array data.
* @param chunkshape The shape of the chunks of the array.
* @param dimnames The names of each dimension of the array.
* @param sampledimname The names of the sample dimension of the array.
* @return An instance of XarrayZarrArray representing the newly created ragged array.
*/
template <typename T>
XarrayZarrArray<Store, T> create_ragged_array(const std::string_view name,
const std::string_view units,
const double scale_factor,
const std::vector<size_t> &chunkshape,
const std::vector<std::string> &dimnames,
const std::string_view sampledimname) const {
return XarrayZarrArray<Store, T>(group.store, datasetdims, name, units, scale_factor,
chunkshape, dimnames, sampledimname);
}
/**
* @brief Creates a new raggedcount array in the dataset.
*
* @tparam T The data type of the array.
* @param name The name of the new array.
* @param units The units of the array data.
* @param scale_factor The scale factor of array data.
* @param chunkshape The shape of the chunks of the array.
* @param dimnames The names of each dimension of the array.
* @param sampledimname The names of the sample dimension of the array.
* @return An instance of XarrayZarrArray representing the newly created raggedcount array.
*/
template <typename T>
XarrayZarrArray<Store, T> create_raggedcount_array(const std::string_view name,
const std::string_view units,
const double scale_factor,
const std::vector<size_t> &chunkshape,
const std::vector<std::string> &dimnames,
const std::string_view sampledimname) const {
return XarrayZarrArray<Store, T>(group.store, datasetdims, name, units, scale_factor,
chunkshape, dimnames, sampledimname);
}
/**
* @brief Calls array's shape function to ensure the shape of the array matches
* the dimensions of the dataset.
*
* @tparam T The data type of the array.
* @param xzarr An instance of XarrayZarrArray representing the array.
*/
template <typename T>
void write_arrayshape(XarrayZarrArray<Store, T> &xzarr) const {
xzarr.write_arrayshape(datasetdims);
}
/**
* @brief Calls array's shape function to ensure the shape of the array matches
* the dimensions of the dataset.
*
* @tparam T The data type of the array.
* @param xzarr_ptr A shared pointer to the instance of XarrayZarrArray representing the array.
*/
template <typename T>
void write_arrayshape(const std::shared_ptr<XarrayZarrArray<Store, T>> xzarr_ptr) const {
xzarr_ptr->write_arrayshape(datasetdims);
}
/**
* @brief Calls array's shape function to write the shape of the array for a ragged array.
*
* @tparam T The data type of the array.
* @param xzarr An instance of XarrayZarrArray representing the array.
*/
template <typename T>
void write_ragged_arrayshape(XarrayZarrArray<Store, T> &xzarr) const {
xzarr.write_ragged_arrayshape();
}
/**
* @brief Writes data from Kokkos view in host memory to a Zarr array in the dataset and calls
* function to ensure the shape of the array matches the dimensions of the dataset.
*
* Function writes data to an array in the dataset and updates the metadata for the shape of
* the array to ensure the size of each dimension of the array is consistent with the
* dimensions of the dataset.
*
* @tparam T The data type of the array.
* @param xzarr An instance of XarrayZarrArray representing the array.
* @param h_data The data to be written to the array.
*/
template <typename T>
void write_to_array(XarrayZarrArray<Store, T> &xzarr,
const typename Buffer<T>::viewh_buffer h_data) const {
xzarr.write_to_array(h_data);
xzarr.write_arrayshape(datasetdims);
}
/**
* @brief Writes data from Kokkos view in host memory to a Zarr array in the dataset and calls
* function to ensure the shape of the array matches the dimensions of the dataset.
*
* Function writes data to an array in the dataset and updates the metadata for the shape of
* the array to ensure the size of each dimension of the array is consistent with the
* dimensions of the dataset.
*
* @tparam T The data type of the array.
* @param xzarr_ptr A shared pointer to the instance of XarrayZarrArray representing the array.
* @param h_data The data to be written to the array.
*/
template <typename T>
void write_to_array(const std::shared_ptr<XarrayZarrArray<Store, T>> xzarr_ptr,
const typename Buffer<T>::viewh_buffer h_data) const {
xzarr_ptr->write_to_array(h_data);
xzarr_ptr->write_arrayshape(datasetdims);
}
/**
* @brief Writes 1 data element to a Zarr array in the dataset and calls
* function to ensure the shape of the array matches the dimensions of the dataset.
*
* Function writes 1 data element to an array in the dataset and updates the metadata for the
* shape of the array to ensure the size of each dimension of the array is consistent with the
* dimensions of the dataset.
*
* @tparam T The data type of the array.
* @param xzarr_ptr A shared pointer to the instance of XarrayZarrArray representing the array.
* @param data The data element to be written to the array.
*/
template <typename T>
void write_to_array(const std::shared_ptr<XarrayZarrArray<Store, T>> xzarr_ptr,
const T data) const {
xzarr_ptr->write_to_array(data);
xzarr_ptr->write_arrayshape(datasetdims);
}
/**
* @brief Writes data from Kokkos view in host memory to a Zarr array in the dataset and calls
* function to ensure the shape of the array matches the dimensions of the dataset.
*
* Function writes data to an array in the dataset and updates the metadata for the shape of
* the array to ensure the size of each dimension of the array is consistent with the
* dimensions of the dataset.
*
* @tparam T The data type of the array.
* @param xzarr An instance of XarrayZarrArray representing the array.
* @param h_data The data to be written to the array.
*/
template <typename T>
void write_to_ragged_array(XarrayZarrArray<Store, T> &xzarr,
const typename Buffer<T>::viewh_buffer h_data) const {
xzarr.write_to_array(h_data);
xzarr.write_ragged_arrayshape();
}
};
#endif // LIBS_ZARR_DATASET_HPP_