Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support reading empty datasets and attributes #67

Merged
merged 1 commit into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions src/hdf5_hl.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ export declare class Attribute {
name: string;
metadata: Metadata;
dtype: Dtype;
shape: number[];
shape: number[] | null;
private _value?;
private _json_value?;
constructor(file_id: bigint, path: string, name: string);
get value(): OutputData;
get json_value(): JSONCompatibleOutputData;
to_array(): string | number | boolean | JSONCompatibleOutputData[];
get value(): OutputData | null;
get json_value(): JSONCompatibleOutputData | null;
to_array(): JSONCompatibleOutputData | null;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding | null to OutputData and JSONCompatibleOutputData would have been quite challenging because the types of recursive. I opted for this simpler typing solution, which also avoids dealing with null in deeper internal functions (e.g. process_data).

Note also that I fixed a few type inference issues (string | number | boolean | JSONCompatibleOutputData[] is the same as JSONCompatibleOutputData).

}
declare abstract class HasAttrs {
file_id: bigint;
Expand Down Expand Up @@ -132,14 +132,14 @@ export declare class Dataset extends HasAttrs {
refresh(): void;
get metadata(): Metadata;
get dtype(): Dtype;
get shape(): number[];
get shape(): number[] | null;
get filters(): Filter[];
get value(): OutputData;
get json_value(): JSONCompatibleOutputData;
slice(ranges: Slice[]): OutputData;
get value(): OutputData | null;
get json_value(): JSONCompatibleOutputData | null;
slice(ranges: Slice[]): OutputData | null;
write_slice(ranges: Slice[], data: any): void;
create_region_reference(ranges: Slice[]): RegionReference;
to_array(): string | number | boolean | JSONCompatibleOutputData[];
to_array(): JSONCompatibleOutputData | null;
resize(new_shape: number[]): number;
make_scale(scale_name?: string): void;
attach_scale(index: number, scale_dset_path: string): void;
Expand All @@ -148,16 +148,20 @@ export declare class Dataset extends HasAttrs {
get_scale_name(): string | null;
set_dimension_label(index: number, label: string): void;
get_dimension_labels(): (string | null)[];
_value_getter(json_compatible?: boolean): OutputData;
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
}
export declare class DatasetRegion {
source_dataset: Dataset;
region_reference: RegionReference;
private _metadata?;
constructor(source_dataset: Dataset, region_reference: RegionReference);
get metadata(): Metadata;
get value(): OutputData;
_value_getter(json_compatible?: boolean): OutputData;
get value(): OutputData | null;
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
}
export declare const h5wasm: {
File: typeof File;
Expand Down
82 changes: 56 additions & 26 deletions src/hdf5_hl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@ function dirname(path: string) {
return head;
}

function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: true): JSONCompatibleOutputData;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: false): OutputData;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean): OutputData | JSONCompatibleOutputData;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean = false) {
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: true): JSONCompatibleOutputData | null;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: false): OutputData | null;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
function get_attr(file_id: bigint, obj_name: string, attr_name: string, json_compatible: boolean = false): OutputData | JSONCompatibleOutputData | null {
let metadata = Module.get_attribute_metadata(file_id, obj_name, attr_name);
if (!metadata.shape) {
return null;
}

let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
var processed;
Expand Down Expand Up @@ -106,6 +110,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
// but otherwise returns Uint8Array raw bytes as loaded.
let output_data: OutputData;
let { shape, type } = metadata;

let known_type = true;
// let length: number;
if (type === Module.H5T_class_t.H5T_STRING.value) {
Expand Down Expand Up @@ -153,9 +158,9 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
else if (type === Module.H5T_class_t.H5T_COMPOUND.value) {
const { size, compound_type } = <{size: Metadata["size"], compound_type: CompoundTypeMetadata}>metadata;
let n = Math.floor(data.byteLength / size);
let output: OutputData[] = [];
let output: (OutputData | JSONCompatibleOutputData)[] = [];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not quite accurate (should be OutputData[] | JSONCompatibleOutputData[]) but sufficient to get rid of the error.

for (let i = 0; i < n; i++) {
let row: OutputData = [];
let row: (OutputData | JSONCompatibleOutputData)[] = [];
let row_data = data.slice(i * size, (i + 1) * size);
for (let member of compound_type.members) {
let member_data = row_data.slice(member.offset, member.offset + member.size);
Expand All @@ -168,7 +173,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo

else if (type === Module.H5T_class_t.H5T_ARRAY.value) {
const { array_type } = <{array_type: Metadata}>metadata;
shape = shape.concat(array_type.shape);
shape = (<number[]>shape).concat(<number[]>array_type.shape);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

process_data should never be called with a null shape, and the array_type metadata should never be parsed with a null. Could check this with proper runtime assertions, but didn't seem worth it.

array_type.shape = shape;
// always convert ARRAY types to base JS types:
output_data = process_data(data, array_type, true);
Expand Down Expand Up @@ -220,7 +225,7 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
}

function isIterable(x: any): x is Iterable<unknown> {
return typeof x === 'object' && Symbol.iterator in x;
return typeof x === 'object' && x !== null && Symbol.iterator in x;
}

function isH5PYBooleanEnum(enum_type: EnumTypeMetadata) {
Expand Down Expand Up @@ -515,9 +520,9 @@ export class Attribute {
name: string;
metadata: Metadata;
dtype: Dtype;
shape: number[];
private _value?: OutputData;
private _json_value?: JSONCompatibleOutputData;
shape: number[] | null;
private _value?: OutputData | null;
private _json_value?: JSONCompatibleOutputData | null;

constructor(file_id: bigint, path: string, name: string) {
this.file_id = file_id;
Expand All @@ -529,27 +534,27 @@ export class Attribute {
this.shape = metadata.shape;
}

get value() {
get value(): OutputData | null {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just being a bit more explicit (here and on a few other methods). It helps double check that the implementation of the method is typed correctly (notably get_attr in this case).

if (typeof this._value === "undefined") {
this._value = get_attr(this.file_id, this.path, this.name, false);
}
return this._value;
}

get json_value() {
get json_value(): JSONCompatibleOutputData | null {
if (typeof this._json_value === "undefined") {
this._json_value = get_attr(this.file_id, this.path, this.name, true);
}
return this._json_value;
}

to_array() {
to_array(): JSONCompatibleOutputData | null {
const { json_value, metadata } = this;
const { shape } = metadata;
if (!isIterable(json_value) || typeof json_value === "string") {
return json_value;
}
return create_nested_array(json_value, shape);
return create_nested_array(json_value, <number[]>shape);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the attribute is empty, then json_value is null (i.e. not iterable).

}
}

Expand Down Expand Up @@ -916,24 +921,28 @@ export class Dataset extends HasAttrs {
return Module.get_dataset_filters(this.file_id, this.path);
}

get value() {
get value(): OutputData | null {
return this._value_getter(false);
}

get json_value(): JSONCompatibleOutputData {
return this._value_getter(true) as JSONCompatibleOutputData;
get json_value(): JSONCompatibleOutputData | null {
return this._value_getter(true);
}

slice(ranges: Slice[]) {
slice(ranges: Slice[]): OutputData | null {
// interpret ranges as [start, stop], with one per dim.
const metadata = this.metadata;
// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
const { shape } = metadata;
if (!shape) {
return null;
}

const {strides, count, offset} = calculateHyperslabParams(shape, ranges);
const total_size = count.reduce((previous, current) => current * previous, 1n);
const nbytes = metadata.size * Number(total_size);
const data_ptr = Module._malloc(nbytes);
let processed;
let processed: OutputData;
try {
Module.get_dataset_data(this.file_id, this.path, count, offset, strides, BigInt(data_ptr));
let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes);
Expand All @@ -950,6 +959,9 @@ export class Dataset extends HasAttrs {
write_slice(ranges: Slice[], data: any) {
// interpret ranges as [start, stop], with one per dim.
let metadata = this.metadata;
if (!metadata.shape) {
throw new Error("cannot write to a slice of an empty dataset");
}
if (metadata.vlen) {
throw new Error("writing to a slice of vlen dtype is not implemented");
}
Expand All @@ -971,20 +983,24 @@ export class Dataset extends HasAttrs {

create_region_reference(ranges: Slice[]) {
const metadata = this.metadata;
if (!metadata.shape) {
throw new Error("cannot create region reference from empty dataset");
}

// interpret ranges as [start, stop], with one per dim.
const { shape } = metadata;
const {strides, count, offset} = calculateHyperslabParams(shape, ranges);
const ref_data = Module.create_region_reference(this.file_id, this.path, count, offset, strides);
return new RegionReference(ref_data);
}

to_array() {
to_array(): JSONCompatibleOutputData | null {
const { json_value, metadata } = this;
const { shape } = metadata;
if (!isIterable(json_value) || typeof json_value === "string") {
return json_value;
}
let nested = create_nested_array(json_value, shape);
let nested = create_nested_array(json_value, <number[]>shape);
return nested;
}

Expand Down Expand Up @@ -1033,8 +1049,15 @@ export class Dataset extends HasAttrs {
return Module.get_dimension_labels(this.file_id, this.path);
}

_value_getter(json_compatible=false) {
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
_value_getter(json_compatible=false): OutputData | JSONCompatibleOutputData | null {
let metadata = this.metadata;
if (!metadata.shape) {
return null
}

// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
Expand Down Expand Up @@ -1071,12 +1094,19 @@ export class DatasetRegion {
return this._metadata;
}

get value() {
get value(): OutputData | null {
return this._value_getter(false);
}

_value_getter(json_compatible=false) {
_value_getter(json_compatible?: false): OutputData | null;
_value_getter(json_compatible: true): JSONCompatibleOutputData | null;
_value_getter(json_compatible: boolean): OutputData | JSONCompatibleOutputData | null;
_value_getter(json_compatible=false): OutputData | JSONCompatibleOutputData | null {
let metadata = this.metadata;
if (!metadata.shape) {
return null;
}

// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
Expand Down Expand Up @@ -1108,7 +1138,7 @@ function create_nested_array(value: JSONCompatibleOutputData[], shape: number[])
const subdims = shape.slice(1).reverse();
for (let dim of subdims) {
// in each pass, replace input with array of slices of input
const new_output: JSONCompatibleOutputData = [];
const new_output: JSONCompatibleOutputData[][] = [];
const { length } = output;
let cursor = 0;
while (cursor < length) {
Expand Down
24 changes: 18 additions & 6 deletions src/hdf5_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,15 +345,26 @@ val get_abstractDS_metadata(hid_t dspace, hid_t dtype, hid_t dcpl)
{
val attr = get_dtype_metadata(dtype);

int rank = H5Sget_simple_extent_ndims(dspace);
int type = H5Sget_simple_extent_type(dspace);
int total_size = H5Sget_simple_extent_npoints(dspace);
attr.set("total_size", total_size);

if (type == H5S_NULL) {
attr.set("shape", val::null());
attr.set("maxshape", val::null());
attr.set("chunks", val::null());
return attr;
}

int rank = H5Sget_simple_extent_ndims(dspace);
std::vector<hsize_t> dims_out(rank);
std::vector<hsize_t> maxdims_out(rank);

int ndims = H5Sget_simple_extent_dims(dspace, dims_out.data(), maxdims_out.data());

val shape = val::array();
val maxshape = val::array();
for (int d = 0; d < ndims; d++)
{
for (int d = 0; d < ndims; d++) {
shape.set(d, (uint)dims_out.at(d));
maxshape.set(d, (uint)maxdims_out.at(d));
}
Expand All @@ -364,19 +375,20 @@ val get_abstractDS_metadata(hid_t dspace, hid_t dtype, hid_t dcpl)

if (dcpl) {
H5D_layout_t layout = H5Pget_layout(dcpl);

if (layout == H5D_CHUNKED) {
std::vector<hsize_t> chunk_dims_out(ndims);
H5Pget_chunk(dcpl, ndims, chunk_dims_out.data());

val chunks = val::array();
for (int c = 0; c < ndims; c++)
{
for (int c = 0; c < ndims; c++) {
chunks.set(c, (uint)chunk_dims_out.at(c));
}

attr.set("chunks", chunks);
}
}

attr.set("total_size", total_size);
return attr;
}

Expand Down
12 changes: 6 additions & 6 deletions src/hdf5_util_helpers.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ export interface H5T_class_t {

export interface Metadata {
array_type?: Metadata,
chunks: Array<number> | null,
chunks: number[] | null,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor stylistic change.

compound_type?: CompoundTypeMetadata,
cset: number,
enum_type?: EnumTypeMetadata,
littleEndian: boolean,
maxshape: Array<number> | null,
maxshape: number[] | null,
ref_type?: 'object' | 'region',
shape: Array<number>,
shape: number[] | null,
signed: boolean,
size: number,
total_size: number,
Expand All @@ -40,7 +40,7 @@ export interface CompoundMember extends Metadata {
}

export interface CompoundTypeMetadata {
members: Array<CompoundMember>
members: CompoundMember[]
nmembers: number;
}

Expand Down Expand Up @@ -105,7 +105,7 @@ export interface H5Module extends EmscriptenModule {
reclaim_vlen_memory(file_id: BigInt, obj_name: string, attr_name: string, data_ptr: bigint): Status;
get_attribute_data(file_id: BigInt, obj_name: string, attr_name: string, arg3: bigint): Status;
FS: FS.FileSystemType,
get_keys_vector(group_id: bigint, H5_index_t: number): Array<string>,
get_keys_vector(group_id: bigint, H5_index_t: number): string[],
get_attribute_metadata(loc_id: bigint, group_name_string: string, attribute_name_string: string): Metadata,
get_plugin_search_paths(): string[],
insert_plugin_search_path(search_path: string, index: number): number,
Expand All @@ -116,7 +116,7 @@ export interface H5Module extends EmscriptenModule {
get_scale_name(loc_id: bigint, dimscale_dset_name: string): string | null,
get_attached_scales(loc_id: bigint, target_dset_name: string, index: number): string[],
set_dimension_label(loc_id: bigint, target_dset_name: string, index: number, label: string): number,
get_dimension_labels(loc_id: bigint, target_dset_name: string): Array<string | null>,
get_dimension_labels(loc_id: bigint, target_dset_name: string): (string | null)[],
create_object_reference(loc_id: bigint, target_name: string): Uint8Array,
create_region_reference(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null): Uint8Array,
get_referenced_name(loc_id: bigint, ref_ptr: Uint8Array, is_object: boolean): string;
Expand Down
Loading