# ZnH5MD and ASE

ZnH5MD provides a fast and easy way to store trajectory data from a list of ASE Atoms objects in a H5MD database.

ZnH5MD only supports a small subset of the features of the `ase.db`.
It uses HDF5 to store the data as large arrays and in chunks, which allows very fast access.

In [1]:
import ase.build
import ase.collections
from ase.calculators.singlepoint import SinglePointCalculator
import numpy as np
import ase.db

In [2]:
# Construct a list of ASE atoms objects with random properties
atoms_list = [ase.build.molecule(x) for x in ase.collections.g2.names]
for atoms in atoms_list:
    atoms.calc = SinglePointCalculator(
        atoms=atoms,
        energy=np.random.rand(),
        forces=np.random.rand(len(atoms), 3),
        stress=np.random.rand(6),
    )

In [3]:
import znh5md

In [4]:
%%time
db = znh5md.io.DataWriter(filename="db.h5")
db.add(znh5md.io.AtomsReader(atoms_list, step=1, time=0.1))

CPU times: user 44.9 ms, sys: 0 ns, total: 44.9 ms
Wall time: 60.3 ms


In [5]:
%%time
traj = znh5md.ASEH5MD("db.h5")
atoms = traj.get_atoms_list()

CPU times: user 10.4 ms, sys: 0 ns, total: 10.4 ms
Wall time: 11.6 ms


In [6]:
len(atoms)

162

In [7]:
atoms[0].get_positions()

array([[ 0.      ,  0.      ,  0.124619],
       [ 0.      ,  1.200647, -0.623095],
       [ 1.039791, -0.600323, -0.623095],
       [-1.039791, -0.600323, -0.623095]])

In [8]:
ase.io.write("molecules.traj", atoms_list)
%time _ = list(ase.io.iread("molecules.traj"))

CPU times: user 24.4 ms, sys: 0 ns, total: 24.4 ms
Wall time: 24.7 ms


In [9]:
%%time
with ase.db.connect("db.db") as conn:
    for atoms in atoms_list:
        conn.write(atoms)

CPU times: user 80.2 ms, sys: 0 ns, total: 80.2 ms
Wall time: 273 ms


In [10]:
%%time
atoms_from_db = []
with ase.db.connect("db.db") as conn:
    for row in conn.select():
        atoms_from_db.append(row.toatoms())

CPU times: user 23.5 ms, sys: 8.46 ms, total: 32 ms
Wall time: 467 ms


In [11]:
atoms_from_db[0].get_positions()

array([[ 0.      ,  0.      ,  0.124619],
       [ 0.      ,  1.200647, -0.623095],
       [ 1.039791, -0.600323, -0.623095],
       [-1.039791, -0.600323, -0.623095]])

Use ASE to read files

In [12]:
ase.io.write("traj.xyz", atoms_list)

In [13]:
db = znh5md.io.DataWriter(filename="from_file.h5")
db.add(znh5md.io.ASEFileReader("traj.xyz", step=1, time=0.1))

162it [00:00, 6154.02it/s]


In [14]:
# traj = znh5md.ASEH5MD("from_file.h5") # ASE Interface
traj = znh5md.DaskH5MD("from_file.h5") # Dask Interface

In [15]:
traj.position.value

Unnamed: 0,Array,Chunk
Bytes,53.16 kiB,2.34 kiB
Shape,"(162, 14, 3)","(10, 10, 3)"
Dask graph,34 chunks in 2 graph layers,34 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 53.16 kiB 2.34 kiB Shape (162, 14, 3) (10, 10, 3) Dask graph 34 chunks in 2 graph layers Data type float64 numpy.ndarray",3  14  162,

Unnamed: 0,Array,Chunk
Bytes,53.16 kiB,2.34 kiB
Shape,"(162, 14, 3)","(10, 10, 3)"
Dask graph,34 chunks in 2 graph layers,34 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [16]:
traj.forces.value

Unnamed: 0,Array,Chunk
Bytes,53.16 kiB,53.16 kiB
Shape,"(162, 14, 3)","(162, 14, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 53.16 kiB 53.16 kiB Shape (162, 14, 3) (162, 14, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  14  162,

Unnamed: 0,Array,Chunk
Bytes,53.16 kiB,53.16 kiB
Shape,"(162, 14, 3)","(162, 14, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [17]:
traj.position.value.compute().shape

(162, 14, 3)

In [18]:
# ZnH5MD fills values with NaNs to make the arrays the same size
traj.position.value.compute()[0]

array([[ 0.      ,  0.      ,  0.124619],
       [ 0.      ,  1.200647, -0.623095],
       [ 1.039791, -0.600323, -0.623095],
       [-1.039791, -0.600323, -0.623095],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan],
       [      nan,       nan,       nan]])

In [19]:
!rm db.h5 db.db from_file.h5 traj.xyz molecules.traj