Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

References support #65

Merged
merged 14 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
uses: mymindstorm/setup-emsdk@v12
with:
# Make sure to set a version number!
version: 3.1.28
version: 3.1.43
# This is the name of the cache folder.
# The cache folder will be placed in the build directory,
# so make sure it doesn't conflict with anything!
Expand All @@ -36,5 +36,10 @@ jobs:
npm i
npm run build

- name: check environment
run: |
pwd
ls -al ./test

- name: test
run: npm test
27 changes: 24 additions & 3 deletions src/hdf5_hl.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export declare const ACCESS_MODES: {
readonly Sr: "H5F_ACC_SWMR_READ";
};
declare type ACCESS_MODESTRING = keyof typeof ACCESS_MODES;
export declare type OutputData = TypedArray | string | number | bigint | boolean | OutputData[];
export declare type OutputData = TypedArray | string | number | bigint | boolean | Reference | RegionReference | OutputData[];
export declare type JSONCompatibleOutputData = string | number | boolean | JSONCompatibleOutputData[];
export declare type Dtype = string | {
compound_type: CompoundTypeMetadata;
Expand All @@ -29,13 +29,15 @@ declare type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Arra
* `[i0, i1, s]` - select every `s` values in the range `i0` to `i1`
**/
declare type Slice = [] | [number | null] | [number | null, number | null] | [number | null, number | null, number | null];
export declare type GuessableDataTypes = TypedArray | number | number[] | string | string[];
export declare type GuessableDataTypes = TypedArray | number | number[] | string | string[] | Reference | Reference[] | RegionReference | RegionReference[];
declare enum OBJECT_TYPE {
DATASET = "Dataset",
GROUP = "Group",
BROKEN_SOFT_LINK = "BrokenSoftLink",
EXTERNAL_LINK = "ExternalLink",
DATATYPE = "Datatype"
DATATYPE = "Datatype",
REFERENCE = "Reference",
REGION_REFERENCE = "RegionReference"
}
export declare class BrokenSoftLink {
target: string;
Expand All @@ -52,6 +54,12 @@ export declare class Datatype {
type: OBJECT_TYPE;
constructor();
}
export declare class Reference {
ref_data: Uint8Array;
constructor(ref_data: Uint8Array);
}
export declare class RegionReference extends Reference {
}
export declare class Attribute {
file_id: bigint;
path: string;
Expand All @@ -77,6 +85,7 @@ declare abstract class HasAttrs {
get_attribute(name: string, json_compatible: false): OutputData;
create_attribute(name: string, data: GuessableDataTypes, shape?: number[] | null, dtype?: string | null): void;
delete_attribute(name: string): number;
create_reference(): Reference;
}
export declare class Group extends HasAttrs {
constructor(file_id: bigint, path: string);
Expand All @@ -90,6 +99,7 @@ export declare class Group extends HasAttrs {
obj_path: string;
};
get(obj_name: string): BrokenSoftLink | ExternalLink | Datatype | Group | Dataset | null;
dereference(ref: Reference | RegionReference): BrokenSoftLink | ExternalLink | Datatype | Group | Dataset | DatasetRegion | null;
create_group(name: string): Group;
create_dataset(args: {
name: string;
Expand Down Expand Up @@ -126,6 +136,7 @@ export declare class Dataset extends HasAttrs {
get json_value(): JSONCompatibleOutputData;
slice(ranges: Slice[]): OutputData;
write_slice(ranges: Slice[], data: any): void;
create_region_reference(ranges: Slice[]): RegionReference;
to_array(): string | number | boolean | JSONCompatibleOutputData[];
resize(new_shape: number[]): number;
make_scale(scale_name?: string): void;
Expand All @@ -137,11 +148,21 @@ export declare class Dataset extends HasAttrs {
get_dimension_labels(): (string | null)[];
_value_getter(json_compatible?: boolean): OutputData;
}
export declare class DatasetRegion {
source_dataset: Dataset;
region_reference: RegionReference;
private _metadata?;
constructor(source_dataset: Dataset, region_reference: RegionReference);
get metadata(): Metadata;
get value(): OutputData;
_value_getter(json_compatible?: boolean): OutputData;
}
export declare const h5wasm: {
File: typeof File;
Group: typeof Group;
Dataset: typeof Dataset;
Datatype: typeof Datatype;
DatasetRegion: typeof DatasetRegion;
ready: Promise<H5Module>;
ACCESS_MODES: {
readonly r: "H5F_ACC_RDONLY";
Expand Down
152 changes: 128 additions & 24 deletions src/hdf5_hl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ function getAccessor(type: 0 | 1, size: Metadata["size"], signed: Metadata["sign
}
}

export type OutputData = TypedArray | string | number | bigint | boolean | OutputData[];
export type OutputData = TypedArray | string | number | bigint | boolean | Reference | RegionReference | OutputData[];
export type JSONCompatibleOutputData = string | number | boolean | JSONCompatibleOutputData[];
export type Dtype = string | {compound_type: CompoundTypeMetadata} | {array_type: Metadata};
export type { Metadata, Filter, CompoundMember, CompoundTypeMetadata, EnumTypeMetadata };
Expand Down Expand Up @@ -181,6 +181,16 @@ function process_data(data: Uint8Array, metadata: Metadata, json_compatible: boo
}
}

else if (type === Module.H5T_class_t.H5T_REFERENCE.value) {
const { ref_type, size } = metadata; // as { ref_type: 'object' | 'region', size: number };
const cls = (ref_type === 'object') ? Reference : RegionReference;
output_data = Array.from({ length: metadata.total_size }).map((_, i) => {
const ref_data = data.slice(i*size, (i+1)*size);
return new cls(ref_data);
});
return output_data;
}

else {
known_type = false;
output_data = data;
Expand Down Expand Up @@ -275,6 +285,10 @@ function prepare_data(data: any, metadata: Metadata, shape?: number[] | bigint[]
}
output = new Uint8Array(typed_array.buffer);
}
else if (metadata.type === Module.H5T_class_t.H5T_REFERENCE.value) {
output = new Uint8Array(metadata.size * total_size);
(data as Reference[]).forEach((r, i) => (output as Uint8Array).set(r.ref_data, i*metadata.size));
}
else {
throw new Error(`data with type ${metadata.type} can not be prepared for write`);
}
Expand Down Expand Up @@ -316,35 +330,45 @@ function metadata_to_dtype(metadata: Metadata): Dtype {
else if (type === Module.H5T_class_t.H5T_ARRAY.value ) {
return { array_type: array_type as Metadata }
}
else if (type === Module.H5T_class_t.H5T_REFERENCE.value) {
return (metadata.ref_type === 'object') ? "Reference" : "RegionReference";
}
else {
return "unknown";
}
}

function dtype_to_metadata(dtype_str: string) {
let match = dtype_str.match(/^([<>|]?)([bhiqefdsBHIQS])([0-9]*)$/);
if (match == null) {
throw dtype_str + " is not a recognized dtype"
}
let [full, endianness, typestr, length] = match;
let metadata = { vlen: false, signed: false } as Metadata;
metadata.littleEndian = (endianness != '>');
if (fmts_int.has(typestr.toLowerCase())) {
metadata.type = Module.H5T_class_t.H5T_INTEGER.value;
metadata.size = (fmts_int.get(typestr.toLowerCase()) as number);
metadata.signed = (typestr.toLowerCase() == typestr);
}
else if (fmts_float.has(typestr)) {
metadata.type = Module.H5T_class_t.H5T_FLOAT.value;
metadata.size = (fmts_float.get(typestr) as number);
}
else if (typestr.toUpperCase() == 'S') {
metadata.type = Module.H5T_class_t.H5T_STRING.value;
metadata.size = (length == "") ? 4 : parseInt(length, 10);
metadata.vlen = (length == "");
if (dtype_str === "Reference" || dtype_str === "RegionReference") {
metadata.type = Module.H5T_class_t.H5T_REFERENCE.value;
metadata.size = (dtype_str === "Reference") ? Module.SIZEOF_OBJ_REF : Module.SIZEOF_DSET_REGION_REF;
metadata.littleEndian = true;
}
else {
throw "should never happen"
let match = dtype_str.match(/^([<>|]?)([bhiqefdsBHIQS])([0-9]*)$/);
if (match == null) {
throw dtype_str + " is not a recognized dtype"
}
let [full, endianness, typestr, length] = match;
metadata.littleEndian = (endianness != '>');
if (fmts_int.has(typestr.toLowerCase())) {
metadata.type = Module.H5T_class_t.H5T_INTEGER.value;
metadata.size = (fmts_int.get(typestr.toLowerCase()) as number);
metadata.signed = (typestr.toLowerCase() == typestr);
}
else if (fmts_float.has(typestr)) {
metadata.type = Module.H5T_class_t.H5T_FLOAT.value;
metadata.size = (fmts_float.get(typestr) as number);
}
else if (typestr.toUpperCase() === 'S') {
metadata.type = Module.H5T_class_t.H5T_STRING.value;
metadata.size = (length == "") ? 4 : parseInt(length, 10);
metadata.vlen = (length == "");
}
else {
throw "should never happen"
}
}
return metadata
}
Expand Down Expand Up @@ -397,7 +421,7 @@ const TypedArray_to_dtype = new Map([
**/
type Slice = [] | [number|null] | [number|null,number|null] | [number|null, number|null, number|null];

export type GuessableDataTypes = TypedArray | number | number[] | string | string[];
export type GuessableDataTypes = TypedArray | number | number[] | string | string[] | Reference | Reference[] | RegionReference | RegionReference[];

function guess_dtype(data: GuessableDataTypes): string {
if (ArrayBuffer.isView(data)) {
Expand All @@ -417,7 +441,13 @@ function guess_dtype(data: GuessableDataTypes): string {
return '<d'; // default float type: Float64
}
else if (arr_data.every((d) => (typeof d == 'string'))) {
return 'S'
return 'S';
}
else if (arr_data.every((d) => d instanceof RegionReference)) {
return 'RegionReference';
}
else if (arr_data.every((d) => d instanceof Reference)) {
return 'Reference';
}
}
throw new Error("unguessable type for data");
Expand All @@ -428,7 +458,9 @@ enum OBJECT_TYPE {
GROUP = "Group",
BROKEN_SOFT_LINK = "BrokenSoftLink",
EXTERNAL_LINK = "ExternalLink",
DATATYPE = 'Datatype'
DATATYPE = 'Datatype',
REFERENCE = 'Reference',
REGION_REFERENCE = 'RegionReference',
}

export class BrokenSoftLink {
Expand All @@ -455,6 +487,16 @@ export class Datatype {
constructor() {}
}

export class Reference {
ref_data: Uint8Array;
constructor(ref_data: Uint8Array) {
this.ref_data = ref_data;
}
}

export class RegionReference extends Reference {
}

export class Attribute {
file_id: bigint;
path: string;
Expand Down Expand Up @@ -572,6 +614,10 @@ abstract class HasAttrs {
return Module.delete_attribute(this.file_id, this.path, name);
}

create_reference(): Reference {
const ref_data = Module.create_object_reference(this.file_id, this.path);
return new Reference(ref_data);
}
}

export class Group extends HasAttrs {
Expand Down Expand Up @@ -640,6 +686,13 @@ export class Group extends HasAttrs {
return null
}

dereference(ref: Reference | RegionReference) {
const is_region = (ref instanceof RegionReference);
const name = Module.get_referenced_name(this.file_id, ref.ref_data, !is_region);
const target = this.get(name);
return (is_region) ? new DatasetRegion(target as Dataset, ref) : target;
}

create_group(name: string): Group {
Module.create_group(this.file_id, this.path + "/" + name);
return this.get(name) as Group;
Expand Down Expand Up @@ -896,6 +949,15 @@ export class Dataset extends HasAttrs {
}
}

create_region_reference(ranges: Slice[]) {
const metadata = this.metadata;
// interpret ranges as [start, stop], with one per dim.
const { shape } = metadata;
const {strides, count, offset} = calculateHyperslabParams(shape, ranges);
const ref_data = Module.create_region_reference(this.file_id, this.path, count, offset, strides);
return new RegionReference(ref_data);
}

to_array() {
const { json_value, metadata } = this;
const { shape } = metadata;
Expand Down Expand Up @@ -972,6 +1034,47 @@ export class Dataset extends HasAttrs {

}

export class DatasetRegion {
source_dataset: Dataset;
region_reference: RegionReference;
private _metadata?: Metadata;

constructor(source_dataset: Dataset, region_reference: RegionReference) {
this.source_dataset = source_dataset;
this.region_reference = region_reference;
}

get metadata() {
if (typeof this._metadata === "undefined") {
this._metadata = Module.get_region_metadata(this.source_dataset.file_id, this.region_reference.ref_data);
}
return this._metadata;
}

get value() {
return this._value_getter(false);
}

_value_getter(json_compatible=false) {
let metadata = this.metadata;
// if auto_refresh is on, getting the metadata has triggered a refresh of the dataset_id;
let nbytes = metadata.size * metadata.total_size;
let data_ptr = Module._malloc(nbytes);
let processed: OutputData;
try {
Module.get_region_data(this.source_dataset.file_id, this.region_reference.ref_data, BigInt(data_ptr));
let data = Module.HEAPU8.slice(data_ptr, data_ptr + nbytes);
processed = process_data(data, metadata, json_compatible);
} finally {
if (metadata.vlen) {
Module.reclaim_vlen_memory(this.source_dataset.file_id, this.source_dataset.path, "", BigInt(data_ptr));
}
Module._free(data_ptr);
}
return processed;
}
}

function create_nested_array(value: JSONCompatibleOutputData[], shape: number[]) {
// check that shapes match:
const total_length = value.length;
Expand Down Expand Up @@ -1001,6 +1104,7 @@ export const h5wasm = {
Group,
Dataset,
Datatype,
DatasetRegion,
ready,
ACCESS_MODES
}
Expand Down
Loading
Loading