# Test de acceso local

Este notebook muestra el acceso a los datasets climáticos en forma local, mediante dos casos de uso: netcdf4-python y zarr.

In [1]:
import dask, xarray, zarr

from dask.diagnostics import ProgressBar, Profiler, ResourceProfiler, CacheProfiler
from dask.diagnostics.profile_visualize import visualize

import bokeh
from bokeh.io import output_notebook
output_notebook()

## NetCDF4 (single thread)

In [2]:
dask.config.set(scheduler='synchronous')

<dask.config.set at 0x7f68042d07b8>

sync; echo 1 > /proc/sys/vm/drop_caches

In [3]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="netcdf4", chunks={'time': 2739, 'lat': 8, 'lon': 32})
ds

<xarray.Dataset>
Dimensions:      (axis_nbounds: 2, lat: 128, lon: 256, time: 43824)
Coordinates:
  * lat          (lat) float64 -88.93 -87.54 -86.14 -84.74 ... 86.14 87.54 88.93
  * lon          (lon) float64 0.0 1.406 2.812 4.219 ... 354.4 355.8 357.2 358.6
    height       float64 ...
  * time         (time) datetime64[ns] 1850-01-01T00:30:00 ... 1854-12-31T23:30:00
Dimensions without coordinates: axis_nbounds
Data variables:
    time_bounds  (time, axis_nbounds) datetime64[ns] dask.array<shape=(43824, 2), chunksize=(2739, 2)>
    tas          (time, lat, lon) float32 dask.array<shape=(43824, 128, 256), chunksize=(2739, 8, 32)>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    creation_date:          2018-09-15T06:24:21Z
    description:            CMIP6 historical
    title:                  CNRM-ESM2-1 model output prepared for CMIP6 / CMI...
    activity_id:            CMIP
    contact:                contact.cmip@meteo.fr
    data_specs_version:     01.00.21
    dr2xml

In [4]:
with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

[########################################] | 100% Completed |  2min 27.5s


In [5]:
visualize([prof, rprof], file_path='netcdf4-synchronous.html')

## NetCDF4 (threads)

In [6]:
dask.config.set(scheduler='threads')

<dask.config.set at 0x7f67f5758320>

sync; echo 1 > /proc/sys/vm/drop_caches

In [7]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="netcdf4", chunks={'time': 2739, 'lat': 8, 'lon': 32})
ds

<xarray.Dataset>
Dimensions:      (axis_nbounds: 2, lat: 128, lon: 256, time: 43824)
Coordinates:
  * lat          (lat) float64 -88.93 -87.54 -86.14 -84.74 ... 86.14 87.54 88.93
  * lon          (lon) float64 0.0 1.406 2.812 4.219 ... 354.4 355.8 357.2 358.6
    height       float64 ...
  * time         (time) datetime64[ns] 1850-01-01T00:30:00 ... 1854-12-31T23:30:00
Dimensions without coordinates: axis_nbounds
Data variables:
    time_bounds  (time, axis_nbounds) datetime64[ns] dask.array<shape=(43824, 2), chunksize=(2739, 2)>
    tas          (time, lat, lon) float32 dask.array<shape=(43824, 128, 256), chunksize=(2739, 8, 32)>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    creation_date:          2018-09-15T06:24:21Z
    description:            CMIP6 historical
    title:                  CNRM-ESM2-1 model output prepared for CMIP6 / CMI...
    activity_id:            CMIP
    contact:                contact.cmip@meteo.fr
    data_specs_version:     01.00.21
    dr2xml

In [8]:
with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

[########################################] | 100% Completed |  1min 34.8s


In [9]:
visualize([prof, rprof], file_path='netcdf4-threads.html')

## Zarr (single thread)

In [15]:
dask.config.set(scheduler='synchronous')

<dask.config.set at 0x7f67f561b240>

sync; echo 1 > /proc/sys/vm/drop_caches

In [16]:
ds = xarray.open_zarr('shared/zarr/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330')
ds

<xarray.Dataset>
Dimensions:      (axis_nbounds: 2, lat: 128, lon: 256, time: 43824)
Coordinates:
    height       float64 ...
  * lat          (lat) float64 -88.93 -87.54 -86.14 -84.74 ... 86.14 87.54 88.93
  * lon          (lon) float64 0.0 1.406 2.812 4.219 ... 354.4 355.8 357.2 358.6
  * time         (time) datetime64[ns] 1850-01-01T00:30:00 ... 1854-12-31T23:30:00
Dimensions without coordinates: axis_nbounds
Data variables:
    tas          (time, lat, lon) float32 dask.array<shape=(43824, 128, 256), chunksize=(2739, 8, 32)>
    time_bounds  (time, axis_nbounds) datetime64[ns] dask.array<shape=(43824, 2), chunksize=(21912, 1)>
Attributes:
    CMIP6_CV_version:       cv=6.2.3.0-7-g2019642
    Conventions:            CF-1.7 CMIP-6.2
    EXPID:                  CNRM-ESM2-1_historical_r1i1p1f2_v2
    activity_id:            CMIP
    arpege_minor_version:   6.3.2
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    contact:           

In [17]:
with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

[########################################] | 100% Completed |  3min  8.9s


In [18]:
visualize([prof, rprof], file_path='zarr-synchronous.html')

## Zarr (threads)

In [19]:
dask.config.set(scheduler='threads')

<dask.config.set at 0x7f677cec4358>

sync; echo 1 > /proc/sys/vm/drop_caches

In [20]:
ds = xarray.open_zarr('shared/zarr/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330')
ds

<xarray.Dataset>
Dimensions:      (axis_nbounds: 2, lat: 128, lon: 256, time: 43824)
Coordinates:
    height       float64 ...
  * lat          (lat) float64 -88.93 -87.54 -86.14 -84.74 ... 86.14 87.54 88.93
  * lon          (lon) float64 0.0 1.406 2.812 4.219 ... 354.4 355.8 357.2 358.6
  * time         (time) datetime64[ns] 1850-01-01T00:30:00 ... 1854-12-31T23:30:00
Dimensions without coordinates: axis_nbounds
Data variables:
    tas          (time, lat, lon) float32 dask.array<shape=(43824, 128, 256), chunksize=(2739, 8, 32)>
    time_bounds  (time, axis_nbounds) datetime64[ns] dask.array<shape=(43824, 2), chunksize=(21912, 1)>
Attributes:
    CMIP6_CV_version:       cv=6.2.3.0-7-g2019642
    Conventions:            CF-1.7 CMIP-6.2
    EXPID:                  CNRM-ESM2-1_historical_r1i1p1f2_v2
    activity_id:            CMIP
    arpege_minor_version:   6.3.2
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  0.0
    contact:           

In [21]:
with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

[########################################] | 100% Completed | 49.3s


In [22]:
visualize([prof, rprof], file_path='zarr-threads.html')

## h5netcdf (single thread)

In [10]:
dask.config.set(scheduler='synchronous')

<dask.config.set at 0x7f67f5209b38>

sync; echo 1 > /proc/sys/vm/drop_caches

In [12]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="h5netcdf", chunks={'time': 2739, 'lat': 8, 'lon': 32})
ds

OSError: Unable to open file (file close degree doesn't match)

In [None]:
with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

In [None]:
visualize([prof, rprof], file_path='h5netcdf-synchronous.html')

## h5netcdf (threads)

In [13]:
dask.config.set(scheduler='threads')

<dask.config.set at 0x7f677cfdb668>

sync; echo 1 > /proc/sys/vm/drop_caches

In [14]:
data = 'shared/chunked/tas_AERhr_CNRM-ESM2-1_historical_r1i1p1f2_gr_185001010030-185412312330.nc'
ds = xarray.open_dataset(data, engine="h5netcdf", chunks={'time': 2739, 'lat': 8, 'lon': 32})
ds

OSError: Unable to open file (file close degree doesn't match)

In [6]:
with ProgressBar(), Profiler() as prof, ResourceProfiler(dt=1) as rprof:
    ds['tas'].mean(dim='time').load()

[########################################] | 100% Completed |  2min 51.9s


In [7]:
visualize([prof, rprof], file_path='h5netcdf-threads.html')