1. Load the entire file onto memory
2. (**, driver="core"**)

In [102]:
import h5py
#f = h5py.File("SLData_AP_PA1300um.mat", driver="core", backing_store=True)
f = h5py.File("SLData_AP1300um.mat", 'r')

Maybe the core driver (above) only works during creation?

In [103]:
 # Split the file into 1-GB chunks
# f = h5py.File("family.hdf5", driver="family", memb_size=1024**3)

## Dataset Basics

In [104]:
import numpy as np
f = h5py.File("testfile.hdf5", "w")
arr = np.ones((5,2))
f["my dataset"] = arr
dset = f["my dataset"]
dset

<HDF5 dataset "my dataset": shape (5, 2), type "<f8">

In [105]:
type(dset[...])

numpy.ndarray

In [106]:
dset[1:4, 1] = 2.0

In [107]:
dset[...]

array([[1., 1.],
       [1., 2.],
       [1., 2.],
       [1., 2.],
       [1., 1.]])

In [108]:
dset = f.create_dataset("test1", (10, 10))

In [109]:
f.create_dataset("test2", (10, 10), dtype=np.complex_)

<HDF5 dataset "test2": shape (10, 10), type "<c16">

In [110]:
f["test2"].dtype

dtype('complex128')

## create 1D dataset of size 4 gigabytes

In [111]:
dset = f.create_dataset("big dataset", (1024**3,), dtype=np.float_)

In [112]:
dset[0:1024] = np.arange(1024)
f.flush()

In [113]:
dset.dtype

dtype('<f8')

## read_direct()
### when reading from file to memory if a preallocated NumPy array of a different dtype is used, then the data is converted to NumPy array's dtype during reading

In [114]:
dset = f["test1"]
dset.dtype

dtype('<f4')

In [115]:
big_out = np.empty((10, 10), dtype=np.float_)

In [116]:
dset.read_direct(big_out)

In [117]:
big_out.dtype

dtype('float64')

In [118]:
dset = f["big dataset"]
out = dset.astype('float32')[0:1021]
out.dtype

dtype('float32')

## slicing efficiently

do instead of this:
```
for ix in range(100):
    for iy in range(100):
        val = dset[ix,iy]
        ....
```
this:
```
for ix in range(100):
    val = dset[ix,:]
    ....
```


In [119]:
dset = f.create_dataset('1d', shape=(1,), data=42)
dset[0]

42

In [120]:
dset = f.create_dataset('0d', data=42)
dset[()]
#dset[0] -will give error

42

In [121]:
dset

<HDF5 dataset "0d": shape (), type "<i4">

In [122]:
row = np.arange(10).reshape(5,2)

In [123]:
row[[0,2,4]]

array([[0, 1],
       [4, 5],
       [8, 9]])

In [124]:
bins = np.linspace(-5,5,10)
counts = np.zeros_like(bins)

## Automatic Broadcasting

do instead of this:
```
# dset.shape  (100,1000)
data = dset[0, :]
for ix in xrange(100):
    dset[ix, :] = data
        ....
```
this:
```
dset[:,:] = dset[0, :] # since the last dim match it gets broadcasted
```

## Reading directly into existing arrays|

In [129]:
dset = f['my dataset']
out = np.empty((100,2), dtype=np.float_)
dset.read_direct(out, source_sel=np.s_[0, :], dest_sel=np.s_[10, :])

In [130]:
type(dset)


h5py._hl.dataset.Dataset

In [131]:
np.s_??

In [132]:
np.s_[2,:]

(2, slice(None, None, None))

In [133]:
id(dset)

1699909930864

In [134]:
id(f["my dataset"])

1699912864272

In [135]:
out.view()

array([[8.39868782e-312, 8.39868782e-312],
       [8.39868780e-312, 8.39868780e-312],
       [8.39868780e-312, 8.39868780e-312],
       [8.39868783e-312, 8.39868783e-312],
       [8.39868783e-312, 8.39868780e-312],
       [8.39868781e-312, 8.39868780e-312],
       [8.39868780e-312, 8.39868753e-312],
       [8.39868755e-312, 8.39868752e-312],
       [8.39868753e-312, 8.39868752e-312],
       [8.39868750e-312, 8.39868752e-312],
       [1.00000000e+000, 1.00000000e+000],
       [8.39868751e-312, 8.39868752e-312],
       [8.39868750e-312, 8.39868751e-312],
       [8.39868749e-312, 8.39868752e-312],
       [8.39868750e-312, 8.39868751e-312],
       [8.39868750e-312, 8.39868751e-312],
       [8.39868752e-312, 8.39868753e-312],
       [8.39868750e-312, 8.39868751e-312],
       [8.39868749e-312, 8.39868753e-312],
       [8.39868751e-312, 8.39868753e-312],
       [8.39868752e-312, 8.39868750e-312],
       [8.39868750e-312, 8.39868749e-312],
       [8.39868752e-312, 8.39868751e-312],
       [8.3

In [136]:
x = np.array([(1, 2)], dtype=[('a', np.int8), ('b', np.int8)])

In [137]:
y = x.view(dtype=np.int16, type=np.matrix)

In [138]:
y

matrix([[513]], dtype=int16)

In [139]:
y.view().reshape(-1,1)

matrix([[513]], dtype=int16)

## Chunked storage

In [140]:
dset = f.create_dataset('chunked', (100,480,640), dtype='i1', chunks=(1,64,64))

In [141]:
#for data in dset.iter_chunks():
#    print(data)

In [142]:
dset.__getattribute__('shape')

(100, 480, 640)

## Root group and subgroups

In [145]:
subgroup = f.create_group("SubGroup")

In [148]:
subgroup.name

'/SubGroup'

In [149]:
subsubgroup = subgroup.create_group('AnotherGroup')

In [179]:
subsubgroup.name

'/SubGroup/AnotherGroup'

In [151]:
out = subgroup.create_group('/onemore/underit/andunderit')

In [153]:
out["tmp"] = 5.0

In [154]:
out

<HDF5 group "/onemore/underit/andunderit" (1 members)>

In [166]:
out.file == f

True

In [164]:
out.parent

<HDF5 group "/onemore/underit" (1 members)>

## Links (there is a layer between group object and objects that are its members)

### hard link to a name created by assigning an object (|objects only have address).
```subgroup = f.create_group("SubGroup")```

In [168]:
f["SubGroup"].name

'/SubGroup'

In [171]:
f["link2SubGroup"] = subgroup

OSError: Unable to create link (name already exists)

In [173]:
f["link2SubGroup"] == f["SubGroup"]

True

In [175]:
nona = f.create_group(None)
print(nona.name)

None


In [176]:
f['nona'] = nona
nona.name

'/nona'

### Free Space and Repacking
When an object (for example, a large dataset) is deleted, the space it occupied on disk
is reused for new objects like groups and datasets. However, at the time of writing, HDF5
does not track such “free space” across file open/close cycles. So if you don’t end up
reusing the space by the time you close the file, you may end up with a “hole” of unusable
space in the file that can’t be reclaimed.
```
>>> del f['bigdata']

$ h5repack bigfile.hdf5 out.hdf5
```

### Soft link (stores the path to an object)

In [180]:
f['softlink'] = h5py.SoftLink('/SubGroup/AnotherGroup')
f['softlink'] == f['SubGroup/AnotherGroup']

True

if we move the dataset and
replace it with something else, /softlink would then point to the new object:
```
>>> grp.move('dataset', 'new_dataset_name')
>>> dset2 = grp.create_dataset('dataset', (50,))
>>> f['softlink'] == dset
False
>>> f['softlink'] == dset2
True
```

In [200]:
tmp_dset = f.create_dataset('DoesntExist',(10,10))

In [212]:
f['brokenSlink'] = h5py.SoftLink('/DoesntExist')

In [213]:
del f['DoesntExist']

### Create dataset when it doesn't exist, otherwise replace the current with new data (same size, and dtype restrictions apply)'

In [224]:
f.require_dataset('1d', shape=(1,), dtype=np.int32, data=np.random.random(1))

<HDF5 dataset "1d": shape (1,), type "<i4">

In [221]:
f['1d'].dtype

dtype('int32')

In [225]:
f.keys()

<KeysViewHDF5 ['0d', '1d', 'SubGroup', 'big dataset', 'brokenSlink', 'chunked', 'link2SubGroup', 'my dataset', 'nona', 'onemore', 'softlink', 'test1', 'test2']>

### Iteration and containership

In [229]:
[(x,y) for x,y in f.items()]

[('0d', <HDF5 dataset "0d": shape (), type "<i4">),
 ('1d', <HDF5 dataset "1d": shape (1,), type "<i4">),
 ('SubGroup', <HDF5 group "/SubGroup" (1 members)>),
 ('big dataset',
  <HDF5 dataset "big dataset": shape (1073741824,), type "<f8">),
 ('brokenSlink', None),
 ('chunked', <HDF5 dataset "chunked": shape (100, 480, 640), type "|i1">),
 ('link2SubGroup', <HDF5 group "/link2SubGroup" (1 members)>),
 ('my dataset', <HDF5 dataset "my dataset": shape (5, 2), type "<f8">),
 ('nona', <HDF5 group "/nona" (0 members)>),
 ('onemore', <HDF5 group "/onemore" (1 members)>),
 ('softlink', <HDF5 group "/softlink" (0 members)>),
 ('test1', <HDF5 dataset "test1": shape (10, 10), type "<f4">),
 ('test2', <HDF5 dataset "test2": shape (10, 10), type "<c16">)]

In [263]:
mylist = []
f.visit(mylist.append)

In [273]:
mydict = {x:x for x in mylist}

In [274]:
mydict['onemore/underit']

'onemore/underit'

In [299]:
f.get('SubGroup/AnotherGroup',getclass=True)

h5py._hl.group.Group

In [302]:
isinstance(f['0d'], h5py.Dataset)

True

In [294]:
for name in f:
    print(name)

0d
1d
SubGroup
big dataset
brokenSlink
chunked
link2SubGroup
my dataset
nona
onemore
softlink
test1
test2
