# Analyze and compress a Dataset

An example on how to use the analyzer to analyze a **dataset** and use the results to store it in a compressed file

### Imports

In [1]:
import xarray as xr
import sys
from pathlib import Path
from enstools.compression.analyzer.analyzer import analyze_dataset

### Download some data

In [2]:
dataset_name = "air_temperature"
dataset = xr.tutorial.open_dataset(dataset_name)
dataset

### Analyze dataset using default constrains

In [3]:
encoding, metrics = analyze_dataset(dataset)

INFO: air lossy,zfp,accuracy,1.12  CR:4.5
INFO: air lossy,zfp,rate,7.05  CR:3.8
INFO: air lossy,zfp,precision,15.1  CR:4.4
INFO: air lossy,sz,abs,0.14  CR:4.6
INFO: air lossy,sz,rel,0.00195  CR:4.6
INFO: air lossy,sz,pw_rel,0.000549  CR:4.5


### Analyze dataset using custom constrains

In [4]:
encoding, metrics = analyze_dataset(dataset,
                          constrains="correlation_I:3,ssim_I:1",)

INFO: air lossy,zfp,accuracy,16  CR:7.9
INFO: air lossy,zfp,rate,3.54  CR:7.6
INFO: air lossy,zfp,precision,12  CR:7.8
INFO: air lossy,sz,abs,1.4  CR:11.9
INFO: air lossy,sz,rel,0.0195  CR:11.8
INFO: air lossy,sz,pw_rel,0.00586  CR:11.5


In [5]:
encoding

{'air': 'lossy,sz,abs,1.4'}

### Save the file using the encoding found in the analysis.

In [6]:
from enstools.io import write

In [7]:
# Define a path for the temporary file
file_path = Path("tmp.nc")

In [8]:
write(dataset, file_path, compression=encoding)

INFO: SZ mode 0 used.
INFO: filter options (0, 1073112678, 1717986918, 1073112678, 1717986918, 1073112678, 1717986918, 1073112678, 1717986918)


In [9]:
if file_path.exists():
    file_path.unlink()