# Background tests for data formatting

## floating point error
Can we use 16-bit floats to represent waveform data with acceptable accuracy?

In [1]:
from __future__ import annotations
import numpy as np
from matplotlib import pyplot as plt
import xarray as xr
import numcodecs
import channel_
from channel_analysis import io, source, structs, waveform
from pathlib import Path
import iqwaveform
import zarr

path = 'test'


def recursive_size(path):
    path = Path(path)
    if path.is_file():
        return path.stat().st_size

    size = 0
    for p in Path(path).glob('**/*'):
        if p.is_file():
            size += p.stat().st_size

    return size


def dump_size(x, *args, **kwargs):
    io.dump('test.zarr.zip', x, mode='w', *args, **kwargs)
    return recursive_size('test.zarr.zip')


fs = 15.36e6
capture = structs.FilteredCapture(duration=0.1, sample_rate=fs, analysis_bandwidth=10e6)
iq = channel_analysis.simulated_awgn(capture)

x = channel_analysis.persistence_spectrum(
    iq,
    capture,
    window=('dpss', 4),
    frequency_resolution=1e3,
    persistence_statistics=('mean', 0.5, 0.75, 0.9, 0.99, 'max'),
    truncate=False,
).to_xarray()
x = xr.Dataset({'persistence_spectrum': x})

ref_size = dump_size(x, compression=False, filter=False)

# buf = x.tobytes()
# arr = xr.DataArray(x)
# ds = xr.Dataset({'iq_waveform': arr})
# mem_size = (np.finfo(x.dtype).bits//8) * x.size

quantizer = io.QuantizeTodB(3, 'float32')

ImportError: cannot import name 'waveform' from 'channel_analysis' (/Users/dkuester/Documents/src/flex-spectrum-sensor/src/channel_analysis/__init__.py)

In [2]:
chunk_duration = 1000e-3

for clevel in [1, 3, 5, 7, 9]:
    compressors = (
        numcodecs.Blosc(cname='zstd', clevel=clevel, shuffle=-1),
        numcodecs.Blosc(cname='zlib', clevel=clevel, shuffle=-1),
        numcodecs.Blosc(cname='lz4', clevel=clevel, shuffle=-1),
        numcodecs.Blosc(cname='lz4hc', clevel=clevel, shuffle=-1),
        numcodecs.Blosc(cname='blosclz', clevel=clevel, shuffle=-1),
    )

    for compressor in compressors:
        out_size = dump_size(x, compression=compressor, filter=True)
        %timeit -n10 dump_size(x, compression=compressor, filter=True)

        print(
            f'{compressor} - disk size: {out_size/1e3:0.1f} kB, mem size: {ref_size/1e3:0.1f} kB, CR={out_size/ref_size:0.3f}'
        )

4.33 ms ± 226 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='zstd', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 179.1 kB, mem size: 309.5 kB, CR=0.579
6.34 ms ± 160 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='zlib', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 166.5 kB, mem size: 309.5 kB, CR=0.538
3.84 ms ± 71.8 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='lz4', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 209.4 kB, mem size: 309.5 kB, CR=0.677
5.59 ms ± 295 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='lz4hc', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 188.2 kB, mem size: 309.5 kB, CR=0.608
4.17 ms ± 165 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='blosclz', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 261.3 kB, mem size: 309.5 kB, CR=0.844
5.45 ms ± 875 µs per loop (mean ± std. dev. of 7 runs, 10 loops ea

In [3]:
for clevel in [1]:
    compressor = numcodecs.Blosc(cname='zlib', clevel=clevel, shuffle=-1)
    filters = [
        io.QuantizeTodB(2, 'float32'),
        io.QuantizeTodB(3, 'float32'),
        io.QuantizeTodB(4, 'float32'),
        False,
    ]

    for filter in filters:
        out_size = dump_size(x, compression=compressor, filter=filter)
        %timeit -n10 dump_size(x, compression=compressor, filter=filter)

        print(
            f'{compressor} - disk size: {out_size/1e3:0.1f} kB, mem size: {ref_size/1e3:0.1f} kB, CR={out_size/ref_size:0.3f}'
        )

6.73 ms ± 857 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='zlib', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 166.5 kB, mem size: 309.5 kB, CR=0.538
6.35 ms ± 95.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='zlib', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 166.5 kB, mem size: 309.5 kB, CR=0.538
6.43 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='zlib', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 166.5 kB, mem size: 309.5 kB, CR=0.538
7.57 ms ± 299 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
Blosc(cname='zlib', clevel=1, shuffle=AUTOSHUFFLE, blocksize=0) - disk size: 297.3 kB, mem size: 309.5 kB, CR=0.961
