In [1]:
import h5py
import h5netcdf
import pyfive
import zh5

import fsspec
import s3fs
import psutil
import time

# Some performance testing

There a lot of cases and gritty details, they will be explained in the best possible way.

In [2]:
def measure(name, pack):
    def decorator(func):
        def wrap(self, *args, **kwargs):
            start_net = psutil.net_io_counters()
            start_time = time.time()
            
            result = func(self, *args, **kwargs)
            
            end_time = time.time()
            end_net = psutil.net_io_counters()
            
            print({
                "package": pack,
                "name": name,
                "time": end_time - start_time,
                "bytes_recv": end_net.bytes_recv - start_net.bytes_recv,
                "bytes_sent": end_net.bytes_sent - start_net.bytes_sent,
                "packets_recv": end_net.packets_recv - start_net.packets_recv,
                "packets_sent": end_net.packets_sent - start_net.packets_sent,
                "errin": end_net.errin - start_net.errin,
                "errout": end_net.errout - start_net.errout,
                "dropin": end_net.dropin - start_net.dropin,
                "dropout": end_net.dropout - start_net.dropout
            })

            return result  # Ensure wrapped method returns expected output

        return wrap
    return decorator

class PerfTest:
    def open(self, store, object):
        raise NotImplementedError

    def locate(self, f, vname):
        v = f[vname]
        return v

    def load(self, v):
        v[:].mean()

class H5pyPerfTest(PerfTest):    
    @measure("open", "h5py")
    def open(self, store, object):
        fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": store})
        p = fs.open(object, "rb")
        f = h5py.File(p)
        return f

    @measure("locate", "h5py")
    def locate(self, f, vname):
        return super().locate(f, vname)

    @measure("load", "h5py")
    def load(self, v):
        super().load(v)

class PyfivePerfTest(PerfTest):    
    @measure("open", "h5py")
    def open(self, store, object):
        fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": store})
        p = fs.open(object, "rb")
        f = pyfive.File(p)
        return f

    @measure("locate", "pyfive")
    def locate(self, f, vname):
        return super().locate(f, vname)

    @measure("load", "pyfive")
    def load(self, v):
        super().load(v)

class Zh5PerfTest(PerfTest):    
    @measure("open", "zh5")
    def open(self, store, object):
        f = zh5.PagedFile("/".join([store, object]))
        return f

    @measure("locate", "zh5")
    def locate(self, f, vname):
        return super().locate(f, vname)

    @measure("load", "zh5")
    def load(self, v):
        super().load(v)

## Original file from UoR

- `UM_m01s30i204_vn1106` is of shape `(2, 11, 1921, 2560)` and contiguous storage.
- `UM_m01s16i202_vn1106` is of shape `(2, 1920, 2560)` and contiguous storage.

In [3]:
store = "https://uor-aces-o.s3-ext.jc.rl.ac.uk"
o = "bnl/ch330a.pc19790301-def-short.nc"  # original BNL file, contiguous storage
vname = "UM_m01s16i202_vn1106"

### h5py

In [4]:
pt = H5pyPerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'h5py', 'name': 'open', 'time': 3.180185079574585, 'bytes_recv': 55116774, 'bytes_sent': 126821, 'packets_recv': 39357, 'packets_sent': 1849, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'h5py', 'name': 'locate', 'time': 6.238452672958374, 'bytes_recv': 165355990, 'bytes_sent': 419098, 'packets_recv': 118183, 'packets_sent': 5597, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'h5py', 'name': 'load', 'time': 0.038481712341308594, 'bytes_recv': 60, 'bytes_sent': 66, 'packets_recv': 1, 'packets_sent': 1, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


### pyfive

In [5]:
pt = PyfivePerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'h5py', 'name': 'open', 'time': 11.983564138412476, 'bytes_recv': 275601012, 'bytes_sent': 695565, 'packets_recv': 197002, 'packets_sent': 9594, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'pyfive', 'name': 'locate', 'time': 0.0007381439208984375, 'bytes_recv': 0, 'bytes_sent': 0, 'packets_recv': 0, 'packets_sent': 0, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'pyfive', 'name': 'load', 'time': 0.030376672744750977, 'bytes_recv': 993, 'bytes_sent': 132, 'packets_recv': 4, 'packets_sent': 2, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


### zh5

In [6]:
pt = Zh5PerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'zh5', 'name': 'open', 'time': 1.8937711715698242, 'bytes_recv': 85599, 'bytes_sent': 23456, 'packets_recv': 203, 'packets_sent': 155, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'zh5', 'name': 'locate', 'time': 0.9024507999420166, 'bytes_recv': 73030, 'bytes_sent': 14783, 'packets_recv': 128, 'packets_sent': 99, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'zh5', 'name': 'load', 'time': 2.2733941078186035, 'bytes_recv': 41387328, 'bytes_sent': 129105, 'packets_recv': 29567, 'packets_sent': 1842, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


## Conclusions

Although the time is distributed along different tasks, the overall times look pretty similar. It makes sense to me.

## Large and compressed chunks dataset and HDF5 file with pages

I have repacked original BNL file so it is stored with compressed chunks.

- `UM_m01s30i204_vn1106` is of shape `(2, 11, 1921, 2560)` and chunkshape `(1, 1, 1921, 2560)`.
- `UM_m01s16i202_vn1106` is of shape `(2, 1920, 2560)` and chunkshape `(1, 1920, 2560)`.

zh5 benefits from multithreaded chunk retrieval, this should be noticed for the variable with 22 chunks.

In [7]:
store = "https://api.cloud.ifca.es:8080/swift/v1"
o = "tests/ch330a.pc19790301-def-short-page.nc"  # original BNL file that has been repacked with chunking and compression
vname = "UM_m01s30i204_vn1106"

### h5py

In [8]:
pt = H5pyPerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'h5py', 'name': 'open', 'time': 7.025630950927734, 'bytes_recv': 55279379, 'bytes_sent': 276795, 'packets_recv': 40779, 'packets_sent': 3264, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'h5py', 'name': 'locate', 'time': 4.742404222488403, 'bytes_recv': 55408023, 'bytes_sent': 270487, 'packets_recv': 42201, 'packets_sent': 3349, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'h5py', 'name': 'load', 'time': 27.823221445083618, 'bytes_recv': 308564735, 'bytes_sent': 1306454, 'packets_recv': 233931, 'packets_sent': 18198, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


### pyfive

In [9]:
pt = PyfivePerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'h5py', 'name': 'open', 'time': 16.48442578315735, 'bytes_recv': 166093142, 'bytes_sent': 768766, 'packets_recv': 126468, 'packets_sent': 9993, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'pyfive', 'name': 'locate', 'time': 14.42582631111145, 'bytes_recv': 165957431, 'bytes_sent': 719836, 'packets_recv': 125981, 'packets_sent': 9928, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'pyfive', 'name': 'load', 'time': 23.47941541671753, 'bytes_recv': 253241225, 'bytes_sent': 1221667, 'packets_recv': 190817, 'packets_sent': 15965, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


### zh5

In [10]:
pt = Zh5PerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'zh5', 'name': 'open', 'time': 2.454031229019165, 'bytes_recv': 189774, 'bytes_sent': 39806, 'packets_recv': 356, 'packets_sent': 293, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'zh5', 'name': 'locate', 'time': 1.5045194625854492, 'bytes_recv': 208780, 'bytes_sent': 84921, 'packets_recv': 506, 'packets_sent': 451, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'zh5', 'name': 'load', 'time': 6.828485727310181, 'bytes_recv': 278378961, 'bytes_sent': 3155474, 'packets_recv': 205357, 'packets_sent': 47351, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


## Conclusions

Accessing the 22 chunk variable (`UM_m01s30i204_vn1106`) is much faster using multithreading in zh5 as expected.

## Small and compressed chunks dataset and HDF5 file with pages

`UM_m01s16i202_vn1106` is of shape `(2, 1920, 2560)` and chunkshape `(1, 1920, 2560)`. Multithreaded chunking retrieval shouldn't be noticed.

In [11]:
store = "https://api.cloud.ifca.es:8080/swift/v1"
o = "tests/ch330a.pc19790301-def-short-page.nc"  # original BNL file that has been repacked with chunking and compression
vname = "UM_m01s16i202_vn1106"

### h5py

In [12]:
pt = H5pyPerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'h5py', 'name': 'open', 'time': 4.430277347564697, 'bytes_recv': 55347877, 'bytes_sent': 275837, 'packets_recv': 42157, 'packets_sent': 3386, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'h5py', 'name': 'locate', 'time': 4.459393739700317, 'bytes_recv': 55285589, 'bytes_sent': 221158, 'packets_recv': 41533, 'packets_sent': 3278, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'h5py', 'name': 'load', 'time': 0.22082304954528809, 'bytes_recv': 3664, 'bytes_sent': 2719, 'packets_recv': 13, 'packets_sent': 8, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


### pyfive

In [13]:
pt = PyfivePerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'h5py', 'name': 'open', 'time': 12.932042360305786, 'bytes_recv': 165984407, 'bytes_sent': 712348, 'packets_recv': 126006, 'packets_sent': 9750, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'pyfive', 'name': 'locate', 'time': 17.123637676239014, 'bytes_recv': 221281933, 'bytes_sent': 1026521, 'packets_recv': 167802, 'packets_sent': 13334, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'pyfive', 'name': 'load', 'time': 0.22912120819091797, 'bytes_recv': 2718, 'bytes_sent': 2554, 'packets_recv': 8, 'packets_sent': 6, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


### zh5

In [14]:
pt = Zh5PerfTest()
f = pt.open(store, o)
v = pt.locate(f, vname)
pt.load(v)

{'package': 'zh5', 'name': 'open', 'time': 2.3845694065093994, 'bytes_recv': 190747, 'bytes_sent': 47093, 'packets_recv': 393, 'packets_sent': 309, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'zh5', 'name': 'locate', 'time': 1.1284563541412354, 'bytes_recv': 132211, 'bytes_sent': 20269, 'packets_recv': 233, 'packets_sent': 161, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}
{'package': 'zh5', 'name': 'load', 'time': 3.391122341156006, 'bytes_recv': 25918632, 'bytes_sent': 284854, 'packets_recv': 18806, 'packets_sent': 3547, 'errin': 0, 'errout': 0, 'dropin': 0, 'dropout': 0}


## Conclusions

`UM_m01s16i202_vn1106` only has two chunks. It seems something weird is happening with pyfive (although I may have not installed the proper branch). It looks like in general pyfive does a lot of "prefetching", maybe this is taking some extra time.