In [1]:
import h5py
import numpy as np
import time

In [2]:
# create an hdf5 file, overwrite the previous one
f = h5py.File("hdf_writing_testfile.h5", 'w')

In [3]:
# Create a new dataset (hopefully, unlimited in size)
c0 = f.create_dataset("chunked", (1024, 0), chunks=(1024, 488), maxshape=(1024, None))
c0.shape

(1024, 0)

In [4]:
c0.resize((1024,488))
c0.shape

(1024, 488)

In [None]:
rng = np.random.default_rng()

data = np.zeros((1024,488))
for i in range(488):
    data[:, i] = rng.standard_normal(1024)

c0[...] = data

c0[0:4, 0]

In [None]:
# what happens when I write to c0 beyond set bounds

data2 = np.arange(0, 1024)

try:
    c0[:, 488] = data2
except IndexError:
    print("nope, out of bounds, going to have to use resize..this is probablay very evil performance wise.")

In [None]:
x,y = c0.shape
print(x,y)

In [11]:
asdf = np.ones(1024)
print(asdf)

[1. 1. 1. ... 1. 1. 1.]


In [12]:
asdf[...] = 0
print(asdf)

[0. 0. 0. ... 0. 0. 0.]


In [None]:
def resizeperftest(n_ops):
    dt_set = np.zeros(n_ops)
    try:
        for i in range(n_ops):
            x,y = c0.shape
            newshape = (x,y+488)
            t = time.perf_counter_ns()
            c0.resize(newshape)
            dt = time.perf_counter_ns()-t
            dt_set[i] = dt
        return dt_set
    except KeyboardInterrupt:
        return dt_set

In [None]:
tdata = resizeperftest(10000)

print(f"tdata in us -> {tdata/1e3}")
c0.shape

In [None]:
testresults = [
    np.mean(resizeperftest(1)),
    np.mean(resizeperftest(100)),
    np.mean(resizeperftest(1000)),
    np.mean(resizeperftest(10_000)),
    np.mean(resizeperftest(1_000_000))
]

In [None]:
_ = [print(f"mean resize timecost in us {test/1e3}") for test in testresults]

In [None]:
def perfTest(fn, args):
    t = time.perf_counter_ns()
    r = fn(*args)
    t2 = time.perf_counter_ns()
    return (r,t2-t1)

In [3]:
d = np.arange(0,500)

In [16]:
lo_freq = "/time_ordered_data/lo_freq" in f
print(lo_freq)

True


In [17]:
f.create_dataset("/time_ordered_data/lo_freq", data=d)

ValueError: Unable to create dataset (name already exists)

In [None]:
lo_freq = "/time_ordered_data/lo_freq" in f
print(lo_freq)