# Test de acceso local

Este notebook muestra el acceso a los datasets climáticos en forma local, mediante tres casos de uso: netcdf4-python, zarr y h5netcdf.

In [1]:
import dask, xarray, zarr

from dask.diagnostics import ProgressBar, Profiler, ResourceProfiler, CacheProfiler
from dask.diagnostics.profile_visualize import visualize

import bokeh
from bokeh.io import output_notebook
#output_notebook()

## NetCDF4 (single thread)

In [2]:
dask.config.set(scheduler='synchronous')

<dask.config.set at 0x7fc4b0166080>

sync; echo 1 > /proc/sys/vm/drop_caches

In [3]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="netcdf4", chunks={'time': 2739, 'lat': 8, 'lon': 32})

with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

ds.close()

visualize([prof, rprof], file_path='netcdf4-synchronous.html')

[########################################] | 100% Completed |  2min 26.2s


## NetCDF4 (threads)

In [4]:
dask.config.set(scheduler='threads')

<dask.config.set at 0x7fc46a6f89b0>

sync; echo 1 > /proc/sys/vm/drop_caches

In [5]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="netcdf4", chunks={'time': 2739, 'lat': 8, 'lon': 32})

with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

ds.close()

visualize([prof, rprof], file_path='netcdf4-threads.html')

[########################################] | 100% Completed |  1min 30.5s


## Zarr (single thread)

In [6]:
dask.config.set(scheduler='synchronous')

<dask.config.set at 0x7fc46a710390>

sync; echo 1 > /proc/sys/vm/drop_caches

In [7]:
ds = xarray.open_zarr('shared/zarr/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330')

with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

visualize([prof, rprof], file_path='zarr-synchronous.html')

[########################################] | 100% Completed |  2min 28.8s


## Zarr (threads)

In [8]:
dask.config.set(scheduler='threads')

<dask.config.set at 0x7fc4a8e4ab00>

sync; echo 1 > /proc/sys/vm/drop_caches

In [9]:
ds = xarray.open_zarr('shared/zarr/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330')

with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

visualize([prof, rprof], file_path='zarr-threads.html')

[########################################] | 100% Completed | 49.1s


## h5netcdf (single thread)

In [10]:
dask.config.set(scheduler='synchronous')

<dask.config.set at 0x7fc4a8d34748>

sync; echo 1 > /proc/sys/vm/drop_caches

In [11]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="h5netcdf", chunks={'time': 2739, 'lat': 8, 'lon': 32})

with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

ds.close()

visualize([prof, rprof], file_path='h5netcdf-synchronous.html')

[########################################] | 100% Completed |  2min  8.7s


## h5netcdf (threads)

In [12]:
dask.config.set(scheduler='threads')

<dask.config.set at 0x7fc4a88d7b70>

sync; echo 1 > /proc/sys/vm/drop_caches

In [13]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="h5netcdf", chunks={'time': 2739, 'lat': 8, 'lon': 32})

with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

ds.close()

visualize([prof, rprof], file_path='h5netcdf-threads.html')

[########################################] | 100% Completed |  1min 49.1s
