In [1]:
from h5glance import H5Glance

In [2]:
file_raw = '/gpfs/exfel/exp/XMPL/201750/p700000/raw/r0001/RAW-R0017-AGIPD01-S00001.h5'
file_proc = '/gpfs/exfel/exp/XMPL/201750/p700000/proc/r0001/CORR-R0017-AGIPD01-S00001.h5'
file_proc_dssc = '/gpfs/exfel/exp/XMPL/201750/p700000/proc/r0023/CORR-R0122-DSSC00-S00000.h5' 

In [3]:
H5Glance(file_raw)

In [4]:
H5Glance(file_proc)

In [3]:
import sys
import os
from tables import open_file, Filters

## Raw

In [4]:
toGB = 1000 * 1000 * 1000
h5_raw_file = open_file(file_raw, mode='r')
image_data_raw = h5_raw_file.root.INSTRUMENT['SPB_DET_AGIPD1M-1']['DET']['1CH0:xtdf']['image']['data'][()]
raw_data_size = sys.getsizeof(image_data_raw)
h5_raw_file.close()

print('Shape of raw image/data: {}'.format(image_data_raw.shape))
print('dtype of raw image/data: {}'.format(image_data_raw.dtype))
print('Raw file size: {0:.3f} GB'.format(os.path.getsize(file_raw) / toGB))
print('Image/data raw size: {0:.3f} GB'.format(raw_data_size / toGB))

Shape of raw image/data: (32768, 2, 512, 128)
dtype of raw image/data: uint16
Raw file size: 8.594 GB
Image/data raw size: 8.590 GB


In [5]:
FILTERS_BLOSC_BLOSCLZ = Filters(complib='blosc:blosclz', complevel=9)
h5_blosc_blosclz_file = open_file('files/raw_blosclz_filter.h5', mode='w')
h5_blosc_blosclz_file.create_earray('/', 'data', obj=image_data_raw, filters=FILTERS_BLOSC_BLOSCLZ)
h5_blosc_blosclz_file.close()

In [6]:
raw_compressed_file_size = os.path.getsize('files/raw_blosclz_filter.h5')

In [7]:
module_raw_img = image_data_raw[:,0,...]

FILTERS_BLOSC_BLOSCLZ = Filters(complib='blosc:blosclz', complevel=9)
h5_blosc_blosclz_file = open_file('files/raw_img.h5', mode='w')
h5_blosc_blosclz_file.create_earray('/', 'img', obj=module_raw_img, filters=FILTERS_BLOSC_BLOSCLZ)
h5_blosc_blosclz_file.close()

In [15]:
print('Ratio: {0:.2f}'.format(raw_compressed_file_size / os.path.getsize(file_raw)))

Ratio: 0.82


## Proc

In [4]:
toGB = 1000 * 1000 * 1000

h5_proc_file = open_file(file_proc, mode='r')
image_data_proc = h5_proc_file.root.INSTRUMENT['SPB_DET_AGIPD1M-1']['DET']['1CH0:xtdf']['image']['data'][()]
image_mask_proc = h5_proc_file.root.INSTRUMENT['SPB_DET_AGIPD1M-1']['DET']['1CH0:xtdf']['image']['mask'][()]
image_gain_proc = h5_proc_file.root.INSTRUMENT['SPB_DET_AGIPD1M-1']['DET']['1CH0:xtdf']['image']['gain'][()]
h5_proc_file.close()

print('Shape of proc image/data: {}'.format(image_data_proc.shape))
print('Proc file size: {0:.3f} GB'.format(os.path.getsize(file_proc) / toGB))
print('Image/data size: {0:.3f} GB'.format(sys.getsizeof(image_data_proc) / toGB))
print('Mask size: {0:.3f} GB'.format(sys.getsizeof(image_mask_proc) / toGB))
print('Gain size: {0:.3f} GB'.format(sys.getsizeof(image_gain_proc) / toGB))

Shape of proc image/data: (32768, 512, 128)
Proc file size: 12.889 GB
Image/data size: 8.590 GB
Mask size: 2.147 GB
Gain size: 2.147 GB


In [5]:
FILTERS_BLOSC_BLOSCLZ = Filters(complib='blosc:blosclz', complevel=9)
h5_blosc_blosclz_file_data = open_file('files/proc_blosclz_data_filter.h5', mode='w')
h5_blosc_blosclz_file_data.create_earray('/', 'data', obj=image_data_proc, filters=FILTERS_BLOSC_BLOSCLZ)
h5_blosc_blosclz_file_data.close()

In [7]:
h5_blosc_blosclz_file_gain = open_file('files/proc_blosclz_gain_filter.h5', mode='w')
h5_blosc_blosclz_file_gain.create_earray('/', 'gain', obj=image_gain_proc, filters=FILTERS_BLOSC_BLOSCLZ)
h5_blosc_blosclz_file_gain.close()

h5_blosc_blosclz_file_mask = open_file('files/proc_blosclz_mask_filter.h5', mode='w')
h5_blosc_blosclz_file_mask.create_earray('/', 'mask', obj=image_mask_proc, filters=FILTERS_BLOSC_BLOSCLZ)
h5_blosc_blosclz_file_mask.close()

In [8]:
print('proc compressed data size: {0:.3f}'.format(os.path.getsize('files/proc_blosclz_data_filter.h5') / sys.getsizeof(image_data_proc)))
print('proc compressed gain size: {0:.3f}'.format(os.path.getsize('files/proc_blosclz_gain_filter.h5') / sys.getsizeof(image_gain_proc)))
print('proc compressed mask size: {0:.3f}'.format(os.path.getsize('files/proc_blosclz_mask_filter.h5') / sys.getsizeof(image_mask_proc)))

proc compressed data size: 0.933
proc compressed gain size: 0.028
proc compressed mask size: 0.056


In [9]:
all_compressed = os.path.getsize('files/proc_blosclz_data_filter.h5') + os.path.getsize('files/proc_blosclz_gain_filter.h5') + os.path.getsize('files/proc_blosclz_mask_filter.h5')
print(all_compressed / os.path.getsize(file_proc))

0.6358899155102828


## DSSC

In [5]:
toGB = 1000 * 1000 * 1000

h5_proc_file = open_file(file_proc_dssc, mode='r')
image_data_proc = h5_proc_file.root.INSTRUMENT['SCS_DET_DSSC1M-1']['DET']['0CH0:xtdf']['image']['data'][()]
h5_proc_file.close()

print('Shape of proc image/data: {}'.format(image_data_proc.shape))
print('Proc file size: {0:.3f} GB'.format(os.path.getsize(file_proc) / toGB))
print('Image/data size: {0:.3f} GB'.format(sys.getsizeof(image_data_proc) / toGB))

Shape of proc image/data: (25600, 128, 512)
Proc file size: 12.889 GB
Image/data size: 6.711 GB


In [6]:
FILTERS_BLOSC_BLOSCLZ = Filters(complib='blosc:blosclz', complevel=9)

h5_blosc_blosclz_file_data = open_file('files/proc_blosclz_data_filter.h5', mode='w')
h5_blosc_blosclz_file_data.create_earray('/', 'data', obj=image_data_proc, filters=FILTERS_BLOSC_BLOSCLZ)
h5_blosc_blosclz_file_data.close()

In [7]:
print('proc compressed data size: {0:.3f}'.format(os.path.getsize('files/proc_blosclz_data_filter.h5') / sys.getsizeof(image_data_proc) * 100))

proc compressed data size: 0.446
