From f21a8bc4b28dc300fa5586963b53a68f85c94789 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Tue, 12 Jan 2021 22:33:37 +0100 Subject: [PATCH] add deterministic xr-metrics to asv benchmark and asv refactor (#231) * add xr metrics and refactor asv * refactor asv * **kwargs for xr metrics * fix xr_spearman_r * rm xr metrics * keep xr_vs_xs benchmark Co-authored-by: Ray Bell --- CHANGELOG.rst | 5 +- asv_bench/benchmarks/__init__.py | 56 +++++- asv_bench/benchmarks/deterministic.py | 114 ++---------- asv_bench/benchmarks/probabilistic.py | 202 +++----------------- asv_bench/benchmarks/xr_vs_xs.py | 253 ++++++++++++-------------- 5 files changed, 216 insertions(+), 414 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 504ed1f4..fa05a21a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -34,11 +34,12 @@ Bug Fixes Internal Changes ~~~~~~~~~~~~~~~~ - Added Python 3.7 and Python 3.8 to the CI. Use the latest version of Python 3 - for development. (:issue:`21`, :pr:`189`). `Aaron Spring`_ -- Lint with the latest black. (:issue:`179`, :pr:`191`). `Ray Bell`_ + for development. (:issue:`21`, :pr:`189`) `Aaron Spring`_ +- Lint with the latest black. (:issue:`179`, :pr:`191`) `Ray Bell`_ - Update mape algorithm from scikit-learn v0.24.0 and test against it. (:issue:`160`, :pr:`230`) `Ray Bell`_ - Pin ``numba`` to ``>=0.52`` to fix CI (:issue:`233`, :pr:`234`) `Ray Bell`_ +- Refactor ``asv`` benchmarks. (:pr:`231`) `Aaron Spring`_ xskillscore v0.0.18 (2020-09-23) diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py index 008907d3..c9c55e40 100644 --- a/asv_bench/benchmarks/__init__.py +++ b/asv_bench/benchmarks/__init__.py @@ -1,9 +1,7 @@ # https://github.com/pydata/xarray/blob/master/asv_bench/benchmarks/__init__.py -import itertools import numpy as np - -_counter = itertools.count() +import xarray as xr def parameterized(names, params): @@ -47,3 +45,55 @@ def randint(low, high=None, size=None, frac_minus=None, seed=0): x.flat[inds] = -1 return x + + +class Generate: + """ + Generate random xr.Dataset ds to be benckmarked. + """ + + timeout = 600 + repeat = (2, 5, 20) + + def make_ds(self, nmember, nx, ny, chunks=None): + + # ds + self.ds = xr.Dataset() + self.nmember = nmember + self.nx = nx + self.ny = ny + + frac_nan = 0.0 + + members = np.arange(1, 1 + self.nmember) + + lons = xr.DataArray( + np.linspace(0, 360, self.nx), + dims=("lon",), + attrs={"units": "degrees east", "long_name": "longitude"}, + ) + lats = xr.DataArray( + np.linspace(-90, 90, self.ny), + dims=("lat",), + attrs={"units": "degrees north", "long_name": "latitude"}, + ) + self.ds["tos"] = xr.DataArray( + randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan, chunks=chunks), + coords={"member": members, "lon": lons, "lat": lats}, + dims=("member", "lon", "lat"), + name="tos", + attrs={"units": "foo units", "description": "a description"}, + ) + self.ds["sos"] = xr.DataArray( + randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan, chunks=chunks), + coords={"member": members, "lon": lons, "lat": lats}, + dims=("member", "lon", "lat"), + name="sos", + attrs={"units": "foo units", "description": "a description"}, + ) + self.ds.attrs = {"history": "created for xskillscore benchmarking"} + + # set nans for land sea mask + self.ds = self.ds.where( + (abs(self.ds.lat) > 20) | (self.ds.lat < 100) | (self.ds.lat > 160) + ) diff --git a/asv_bench/benchmarks/deterministic.py b/asv_bench/benchmarks/deterministic.py index c28e1d90..eed03e75 100644 --- a/asv_bench/benchmarks/deterministic.py +++ b/asv_bench/benchmarks/deterministic.py @@ -1,13 +1,14 @@ # Write the benchmarking functions here. # See "Writing benchmarks" in the asv docs for more information. +import os import numpy as np import xarray as xr from xskillscore import mae, mse, pearson_r, pearson_r_p_value, rmse -from . import parameterized, randn, requires_dask +from . import Generate, parameterized, requires_dask DETERMINISTIC_METRICS = [rmse, pearson_r, mae, mse, pearson_r_p_value] @@ -16,124 +17,43 @@ nmember = 4 -class Generate: - """ - Generate random ds, control to be benckmarked. - """ - - timeout = 600 - repeat = (2, 5, 20) - - def make_ds(self, nmember, nx, ny): - - # ds - self.ds = xr.Dataset() - self.nmember = nmember - self.nx = nx # 4 deg - self.ny = ny # 4 deg - - frac_nan = 0.0 - - members = np.arange(1, 1 + self.nmember) - - lons = xr.DataArray( - np.linspace(0, 360, self.nx), - dims=("lon",), - attrs={"units": "degrees east", "long_name": "longitude"}, - ) - lats = xr.DataArray( - np.linspace(-90, 90, self.ny), - dims=("lat",), - attrs={"units": "degrees north", "long_name": "latitude"}, - ) - self.ds["tos"] = xr.DataArray( - randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan), - coords={"member": members, "lon": lons, "lat": lats}, - dims=("member", "lon", "lat"), - name="tos", - encoding=None, - attrs={"units": "foo units", "description": "a description"}, - ) - self.ds["sos"] = xr.DataArray( - randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan), - coords={"member": members, "lon": lons, "lat": lats}, - dims=("member", "lon", "lat"), - name="sos", - encoding=None, - attrs={"units": "foo units", "description": "a description"}, - ) - self.ds.attrs = {"history": "created for xarray benchmarking"} - - # set nans for land sea mask - self.ds = self.ds.where( - (abs(self.ds.lat) > 20) | (self.ds.lat < 100) | (self.ds.lat > 160) - ) - - class Compute_small(Generate): """ A benchmark xskillscore.metric for small xr.DataArrays""" def setup(self, *args, **kwargs): - self.make_ds(nmember, 90, 45) # 4 degree grid + self.make_ds(nmember, 1, 1) # no grid @parameterized("metric", DETERMINISTIC_METRICS) - def time_xskillscore_metric_small(self, metric): + def time_xskillscore_metric(self, metric): """Take time for xskillscore.metric.""" dim = "member" - metric(self.ds["tos"], self.ds["sos"], dim=dim) + metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() @parameterized("metric", DETERMINISTIC_METRICS) - def peakmem_xskillscore_metric_small(self, metric): + def peakmem_xskillscore_metric(self, metric): dim = "member" """Take memory peak for xskillscore.metric.""" - metric(self.ds["tos"], self.ds["sos"], dim=dim) + metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() -class Compute_large(Generate): +class Compute_large(Compute_small): """ A benchmark xskillscore.metric for large xr.DataArrays""" - def setup_cache(self, *args, **kwargs): - self.make_ds(nmember, large_lon_lat, large_lon_lat) - self.ds.to_netcdf("large.nc") - def setup(self, *args, **kwargs): - self.ds = xr.open_dataset("large.nc") - - @parameterized("metric", DETERMINISTIC_METRICS) - def time_xskillscore_metric_large(self, metric): - """Take time for xskillscore.metric.""" - dim = "member" - metric(self.ds["tos"], self.ds["sos"], dim=dim) - - @parameterized("metric", DETERMINISTIC_METRICS) - def peakmem_xskillscore_metric_large(self, metric): - dim = "member" - """Take memory peak for xskillscore.metric.""" - metric(self.ds["tos"], self.ds["sos"], dim=dim) + self.make_ds(nmember, large_lon_lat, large_lon_lat) -class Compute_large_dask(Generate): +class Compute_large_dask(Compute_large): """ A benchmark xskillscore.metric for large xr.DataArrays with dask.""" - def setup_cache(self, *args, **kwargs): - requires_dask() - self.make_ds(nmember, large_lon_lat, large_lon_lat) - self.ds.to_netcdf("large.nc") - def setup(self, *args, **kwargs): - self.ds = xr.open_dataset("large.nc", chunks={"lon": large_lon_lat_chunksize}) - - @parameterized("metric", DETERMINISTIC_METRICS) - def time_xskillscore_metric_large_dask(self, metric): - """Take time for xskillscore.metric.""" - dim = "member" - metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() - - @parameterized("metric", DETERMINISTIC_METRICS) - def peakmem_xskillscore_metric_large_dask(self, metric): - dim = "member" - """Take memory peak for xskillscore.metric.""" - metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() + requires_dask() + self.make_ds( + nmember, + large_lon_lat, + large_lon_lat, + chunks={"lon": large_lon_lat_chunksize}, + ) diff --git a/asv_bench/benchmarks/probabilistic.py b/asv_bench/benchmarks/probabilistic.py index 5b4a67f3..b341707d 100644 --- a/asv_bench/benchmarks/probabilistic.py +++ b/asv_bench/benchmarks/probabilistic.py @@ -14,7 +14,7 @@ threshold_brier_score, ) -from . import parameterized, randn, requires_dask +from . import Generate, parameterized, requires_dask PROBABILISTIC_METRICS = [ crps_ensemble, @@ -31,228 +31,82 @@ nmember = 4 -class Generate: - """ - Generate random fct and obs to be benckmarked. - """ - - timeout = 600 - repeat = (2, 5, 20) - - def make_ds(self, nmember, nx, ny): - - # ds - self.obs = xr.Dataset() - self.fct = xr.Dataset() - self.nmember = nmember - self.nx = nx # 4 deg - self.ny = ny # 4 deg - - frac_nan = 0.0 - - members = np.arange(1, 1 + self.nmember) - - lons = xr.DataArray( - np.linspace(0, 360, self.nx), - dims=("lon",), - attrs={"units": "degrees east", "long_name": "longitude"}, - ) - lats = xr.DataArray( - np.linspace(-90, 90, self.ny), - dims=("lat",), - attrs={"units": "degrees north", "long_name": "latitude"}, - ) - self.fct["tos"] = xr.DataArray( - randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan), - coords={"member": members, "lon": lons, "lat": lats}, - dims=("member", "lon", "lat"), - name="tos", - encoding=None, - attrs={"units": "foo units", "description": "a description"}, - ) - self.obs["tos"] = xr.DataArray( - randn((self.nx, self.ny), frac_nan=frac_nan), - coords={"lon": lons, "lat": lats}, - dims=("lon", "lat"), - name="tos", - encoding=None, - attrs={"units": "foo units", "description": "a description"}, - ) - - self.fct.attrs = {"history": "created for xarray benchmarking"} - self.obs.attrs = {"history": "created for xarray benchmarking"} - - # set nans for land sea mask - self.fct = self.fct.where( - (abs(self.fct.lat) > 20) | (self.fct.lat < 100) | (self.fct.lat > 160) - ) - self.obs = self.obs.where( - (abs(self.obs.lat) > 20) | (self.obs.lat < 100) | (self.obs.lat > 160) - ) - - class Compute_small(Generate): """ A benchmark xskillscore.metric for small xr.DataArrays""" def setup(self, *args, **kwargs): - self.make_ds(nmember, 90, 45) # 4 degree grid + self.make_ds(nmember, 1, 1) # no grid + self.ds["tos"] = self.ds["tos"].isel(member=0, drop=True) @parameterized("metric", PROBABILISTIC_METRICS) - def time_xskillscore_probabilistic_small(self, metric): + def time_xskillscore_metric(self, metric): """Take time for xskillscore.metric.""" if metric is crps_gaussian: mu = 0.5 sig = 0.2 - metric(self.obs["tos"], mu, sig) + metric(self.ds["tos"], mu, sig).compute() elif metric is crps_quadrature: if not including_crps_quadrature: pass else: xmin, xmax, tol = -10, 10, 1e-6 cdf_or_dist = norm - metric(self.obs["tos"], cdf_or_dist, xmin, xmax, tol) + metric(self.ds["tos"], cdf_or_dist, xmin, xmax, tol).compute() elif metric is crps_ensemble: - metric(self.obs["tos"], self.fct["tos"]) + metric(self.ds["tos"], self.ds["sos"]).compute() elif metric is threshold_brier_score: threshold = 0.5 - metric(self.obs["tos"], self.fct["tos"], threshold) + metric(self.ds["tos"], self.ds["sos"], threshold).compute() elif metric is brier_score: - metric(self.obs["tos"] > 0.5, (self.fct["tos"] > 0.5).mean("member")) + metric( + self.ds["tos"] > 0.5, (self.ds["sos"] > 0.5).mean("member") + ).compute() @parameterized("metric", PROBABILISTIC_METRICS) - def peakmem_xskillscore_probabilistic_small(self, metric): + def peakmem_xskillscore_metric(self, metric): """Take time for xskillscore.metric.""" if metric is crps_gaussian: mu = 0.5 sig = 0.2 - metric(self.obs["tos"], mu, sig) + metric(self.ds["tos"], mu, sig).compute() elif metric is crps_quadrature: if not including_crps_quadrature: pass else: xmin, xmax, tol = -10, 10, 1e-6 cdf_or_dist = norm - metric(self.obs["tos"], cdf_or_dist, xmin, xmax, tol) + metric(self.ds["tos"], cdf_or_dist, xmin, xmax, tol).compute() elif metric is crps_ensemble: - metric(self.obs["tos"], self.fct["tos"]) + metric(self.ds["tos"], self.ds["sos"]).compute() elif metric is threshold_brier_score: threshold = 0.5 - metric(self.obs["tos"], self.fct["tos"], threshold) + metric(self.ds["tos"], self.ds["sos"], threshold).compute() elif metric is brier_score: - metric(self.obs["tos"] > 0.5, (self.fct["tos"] > 0.5).mean("member")) + metric( + self.ds["tos"] > 0.5, (self.ds["sos"] > 0.5).mean("member") + ).compute() -class Compute_large(Generate): +class Compute_large(Compute_small): """ A benchmark xskillscore.metric for large xr.DataArrays.""" def setup(self, *args, **kwargs): self.make_ds(nmember, large_lon_lat, large_lon_lat) - - @parameterized("metric", PROBABILISTIC_METRICS) - def time_xskillscore_probabilistic_large(self, metric): - """Take time for xskillscore.metric.""" - if metric is crps_gaussian: - mu = 0.5 - sig = 0.2 - metric(self.obs["tos"], mu, sig) - elif metric is crps_quadrature: - if not including_crps_quadrature: - pass - else: - xmin, xmax, tol = -10, 10, 1e-6 - cdf_or_dist = norm - metric(self.obs["tos"], cdf_or_dist, xmin, xmax, tol) - elif metric is crps_ensemble: - metric(self.obs["tos"], self.fct["tos"]) - elif metric is threshold_brier_score: - threshold = 0.5 - metric(self.obs["tos"], self.fct["tos"], threshold) - elif metric is brier_score: - metric(self.obs["tos"] > 0.5, (self.fct["tos"] > 0.5).mean("member")) - - @parameterized("metric", PROBABILISTIC_METRICS) - def peakmem_xskillscore_probabilistic_large(self, metric): - """Take time for xskillscore.metric.""" - if metric is crps_gaussian: - mu = 0.5 - sig = 0.2 - metric(self.obs["tos"], mu, sig) - elif metric is crps_quadrature: - if not including_crps_quadrature: - pass - else: - xmin, xmax, tol = -10, 10, 1e-6 - cdf_or_dist = norm - metric(self.obs["tos"], cdf_or_dist, xmin, xmax, tol) - elif metric is crps_ensemble: - metric(self.obs["tos"], self.fct["tos"]) - elif metric is threshold_brier_score: - threshold = 0.5 - metric(self.obs["tos"], self.fct["tos"], threshold) - elif metric is brier_score: - metric(self.obs["tos"] > 0.5, (self.fct["tos"] > 0.5).mean("member")) + self.ds["tos"] = self.ds["tos"].isel(member=0, drop=True) -class Compute_large_dask(Generate): +class Compute_large_dask(Compute_small): """ A benchmark xskillscore.metric for large xr.DataArrays with dask.""" def setup(self, *args, **kwargs): requires_dask() - self.make_ds(nmember, large_lon_lat, large_lon_lat) - self.obs = self.obs.chunk( - {"lon": large_lon_lat_chunksize, "lat": large_lon_lat_chunksize} + self.make_ds( + nmember, + large_lon_lat, + large_lon_lat, + chunks={"lon": large_lon_lat_chunksize}, ) - self.fct = self.fct.chunk( - {"lon": large_lon_lat_chunksize, "lat": large_lon_lat_chunksize} - ) - - @parameterized("metric", PROBABILISTIC_METRICS) - def time_xskillscore_probabilistic_large_dask(self, metric): - """Take time for xskillscore.metric.""" - if metric is crps_gaussian: - mu = 0.5 - sig = 0.2 - metric(self.obs["tos"], mu, sig).compute() - elif metric is crps_quadrature: - if not including_crps_quadrature: - pass - else: - xmin, xmax, tol = -10, 10, 1e-6 - cdf_or_dist = norm - metric(self.obs["tos"], cdf_or_dist, xmin, xmax, tol).compute() - elif metric is crps_ensemble: - metric(self.obs["tos"], self.fct["tos"]).compute() - elif metric is threshold_brier_score: - threshold = 0.5 - metric(self.obs["tos"], self.fct["tos"], threshold).compute() - elif metric is brier_score: - metric( - self.obs["tos"] > 0.5, (self.fct["tos"] > 0.5).mean("member") - ).compute() - - @parameterized("metric", PROBABILISTIC_METRICS) - def peakmem_xskillscore_probabilistic_large_dask(self, metric): - """Take time for xskillscore.metric.""" - if metric is crps_gaussian: - mu = 0.5 - sig = 0.2 - metric(self.obs["tos"], mu, sig).compute() - elif metric is crps_quadrature: - if not including_crps_quadrature: - pass - else: - xmin, xmax, tol = -10, 10, 1e-6 - cdf_or_dist = norm - metric(self.obs["tos"], cdf_or_dist, xmin, xmax, tol).compute() - elif metric is crps_ensemble: - metric(self.obs["tos"], self.fct["tos"]).compute() - elif metric is threshold_brier_score: - threshold = 0.5 - metric(self.obs["tos"], self.fct["tos"], threshold).compute() - elif metric is brier_score: - metric( - self.obs["tos"] > 0.5, (self.fct["tos"] > 0.5).mean("member") - ).compute() + self.ds["tos"] = self.ds["tos"].isel(member=0, drop=True) diff --git a/asv_bench/benchmarks/xr_vs_xs.py b/asv_bench/benchmarks/xr_vs_xs.py index 82696af7..2259f9ef 100644 --- a/asv_bench/benchmarks/xr_vs_xs.py +++ b/asv_bench/benchmarks/xr_vs_xs.py @@ -2,105 +2,114 @@ # See "Writing benchmarks" in the asv docs for more information. +import os +import shutil + +import bottleneck as bn import numpy as np import pandas as pd import xarray as xr -from xskillscore import mse as xs_mse, pearson_r as xs_pearson_r - -from . import parameterized, randn, requires_dask - - -def xr_mse(a, b, dim): - """mse implementation using xarray only.""" - return ((a - b) ** 2).mean(dim) - - -def covariance_gufunc(x, y): - return ( - (x - x.mean(axis=-1, keepdims=True)) * (y - y.mean(axis=-1, keepdims=True)) - ).mean(axis=-1) - - -def pearson_correlation_gufunc(x, y): - return covariance_gufunc(x, y) / (x.std(axis=-1) * y.std(axis=-1)) - - -def xr_pearson_r(x, y, dim): - """pearson_r implementation using xarray and minimal numpy only.""" - return xr.apply_ufunc( - pearson_correlation_gufunc, - x, - y, - input_core_dims=[[dim], [dim]], - dask="parallelized", - output_dtypes=[float], - ) - - -METRICS = [xs_mse, xr_mse, xs_pearson_r, xr_pearson_r] +from xskillscore import ( + mae as xs_mae, + mse as xs_mse, + pearson_r as xs_pearson_r, + rmse as xs_rmse, + spearman_r as xs_spearman_r, +) + +from . import Generate, parameterized, requires_dask + +# These metrics in xskillscore.xr.deterministic, entirely written in xarray functions, +# are identical to the well documented metrics in xskillscore.core.deterministic, which +# are based on numpy functions applied to xarray objects by xarray.apply_ufunc. As the +# xr metrics are only faster for small data, their use is not encouraged, as the +# numpy-based metrics are 20-40% faster on large data.""" + + +def xr_mse(a, b, dim=None, skipna=True, weights=None): + res = (a - b) ** 2 + if weights is not None: + res = res.weighted(weights) + res = res.mean(dim=dim, skipna=skipna) + return res + + +def xr_mae(a, b, dim=None, skipna=True, weights=None): + res = np.abs(a - b) + if weights is not None: + res = res.weighted(weights) + res = res.mean(dim=dim, skipna=skipna) + return res + + +def xr_me(a, b, dim=None, skipna=True, weights=None): + res = a - b + if weights is not None: + res = res.weighted(weights) + res = res.mean(dim=dim, skipna=skipna) + return res + + +def xr_rmse(a, b, dim=None, skipna=True, weights=None): + res = (a - b) ** 2 + if weights is not None: + res = res.weighted(weights) + res = res.mean(dim=dim, skipna=skipna) + res = np.sqrt(res) + return res + + +def xr_pearson_r(a, b, dim=None, **kwargs): + return xr.corr(a, b, dim) + + +def _rankdata(o, dim): + if isinstance(dim, str): + dim = [dim] + elif dim is None: + dim = list(o.dims) + if len(dim) == 1: + return xr.apply_ufunc( + bn.nanrankdata, + o, + input_core_dims=[[]], + kwargs={"axis": o.get_axis_num(dim[0])}, + dask="allowed", + ) + elif len(dim) > 1: + # stack rank unstack + return xr.apply_ufunc( + bn.nanrankdata, + o.stack(ndim=dim), + input_core_dims=[[]], + kwargs={"axis": -1}, + dask="allowed", + ).unstack("ndim") + + +def xr_spearman_r(a, b, dim=None, **kwargs): + return xr.corr(_rankdata(a, dim), _rankdata(b, dim), dim) + + +METRICS = [ + xs_mse, + xr_mse, + xs_rmse, + xr_rmse, + xs_mae, + xr_mae, + xs_pearson_r, + xr_pearson_r, + xs_spearman_r, + xr_spearman_r, +] + +DIMS = ["member", ["lon", "lat"]] large_lon_lat = 2000 large_lon_lat_chunksize = large_lon_lat // 4 -ntime = 4 - - -class Generate: - """ - Generate random ds to be benckmarked. - """ - - timeout = 600 - repeat = (2, 5, 20) - - def make_ds(self, ntime, nx, ny): - - # ds - self.ds = xr.Dataset() - self.ntime = ntime - self.nx = nx # 4 deg - self.ny = ny # 4 deg - - frac_nan = 0.0 - - times = pd.date_range( - start="1/1/2000", - periods=ntime, - freq="D", - ) - - lons = xr.DataArray( - np.linspace(0, 360, self.nx), - dims=("lon",), - attrs={"units": "degrees east", "long_name": "longitude"}, - ) - lats = xr.DataArray( - np.linspace(-90, 90, self.ny), - dims=("lat",), - attrs={"units": "degrees north", "long_name": "latitude"}, - ) - self.ds["tos"] = xr.DataArray( - randn((self.ntime, self.nx, self.ny), frac_nan=frac_nan), - coords={"time": times, "lon": lons, "lat": lats}, - dims=("time", "lon", "lat"), - name="tos", - encoding=None, - attrs={"units": "foo units", "description": "a description"}, - ) - self.ds["sos"] = xr.DataArray( - randn((self.ntime, self.nx, self.ny), frac_nan=frac_nan), - coords={"time": times, "lon": lons, "lat": lats}, - dims=("time", "lon", "lat"), - name="sos", - encoding=None, - attrs={"units": "foo units", "description": "a description"}, - ) - self.ds.attrs = {"history": "created for xarray benchmarking"} - - # set nans for land sea mask - self.ds = self.ds.where( - (abs(self.ds.lat) > 20) | (self.ds.lat < 100) | (self.ds.lat > 160) - ) +ntime = 24 class Compute_small(Generate): @@ -108,65 +117,33 @@ class Compute_small(Generate): A benchmark xskillscore.metric for small xr.DataArrays""" def setup(self, *args, **kwargs): - self.make_ds(ntime, 90, 45) # 4 degree grid + self.make_ds(ntime, 1, 1) # no grid - @parameterized("metric", METRICS) - def time_xskillscore_metric_small(self, metric): + @parameterized(["metric", "dim"], (METRICS, DIMS)) + def time_xskillscore_metric(self, metric, dim): """Take time for xskillscore.metric.""" - dim = "time" - metric(self.ds["tos"], self.ds["sos"], dim=dim) + metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() - @parameterized("metric", METRICS) - def peakmem_xskillscore_metric_small(self, metric): - dim = "time" + @parameterized(["metric", "dim"], (METRICS, DIMS)) + def peakmem_xskillscore_metric(self, metric, dim): """Take memory peak for xskillscore.metric.""" - metric(self.ds["tos"], self.ds["sos"], dim=dim) + metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() -class Compute_large(Generate): +class Compute_large(Compute_small): """ A benchmark xskillscore.metric for large xr.DataArrays""" - def setup_cache(self, *args, **kwargs): - self.make_ds(ntime, large_lon_lat, large_lon_lat) - self.ds.to_netcdf("large.nc") - def setup(self, *args, **kwargs): - self.ds = xr.open_dataset("large.nc") - - @parameterized("metric", METRICS) - def time_xskillscore_metric_large(self, metric): - """Take time for xskillscore.metric.""" - dim = "time" - metric(self.ds["tos"], self.ds["sos"], dim=dim) - - @parameterized("metric", METRICS) - def peakmem_xskillscore_metric_large(self, metric): - dim = "time" - """Take memory peak for xskillscore.metric.""" - metric(self.ds["tos"], self.ds["sos"], dim=dim) + self.make_ds(ntime, large_lon_lat, large_lon_lat) -class Compute_large_dask(Generate): +class Compute_large_dask(Compute_small): """ A benchmark xskillscore.metric for large xr.DataArrays with dask.""" - def setup_cache(self, *args, **kwargs): - requires_dask() - self.make_ds(ntime, large_lon_lat, large_lon_lat) - self.ds.to_netcdf("large.nc") - def setup(self, *args, **kwargs): - self.ds = xr.open_dataset("large.nc", chunks={"lon": large_lon_lat_chunksize}) - - @parameterized("metric", METRICS) - def time_xskillscore_metric_large_dask(self, metric): - """Take time for xskillscore.metric.""" - dim = "time" - metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() - - @parameterized("metric", METRICS) - def peakmem_xskillscore_metric_large_dask(self, metric): - dim = "time" - """Take memory peak for xskillscore.metric.""" - metric(self.ds["tos"], self.ds["sos"], dim=dim).compute() + requires_dask() + self.make_ds( + ntime, large_lon_lat, large_lon_lat, chunks={"lon": large_lon_lat_chunksize} + )