Skip to content

Commit

Permalink
add deterministic xr-metrics to asv benchmark and asv refactor (#231)
Browse files Browse the repository at this point in the history
* add xr metrics and refactor asv

* refactor asv

* **kwargs for xr metrics

* fix xr_spearman_r

* rm xr metrics

* keep xr_vs_xs benchmark

Co-authored-by: Ray Bell <rayjohnbell0@gmail.com>
  • Loading branch information
aaronspring and raybellwaves committed Jan 12, 2021
1 parent 7f04738 commit f21a8bc
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 414 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ Bug Fixes
Internal Changes
~~~~~~~~~~~~~~~~
- Added Python 3.7 and Python 3.8 to the CI. Use the latest version of Python 3
for development. (:issue:`21`, :pr:`189`). `Aaron Spring`_
- Lint with the latest black. (:issue:`179`, :pr:`191`). `Ray Bell`_
for development. (:issue:`21`, :pr:`189`) `Aaron Spring`_
- Lint with the latest black. (:issue:`179`, :pr:`191`) `Ray Bell`_
- Update mape algorithm from scikit-learn v0.24.0 and test against it.
(:issue:`160`, :pr:`230`) `Ray Bell`_
- Pin ``numba`` to ``>=0.52`` to fix CI (:issue:`233`, :pr:`234`) `Ray Bell`_
- Refactor ``asv`` benchmarks. (:pr:`231`) `Aaron Spring`_


xskillscore v0.0.18 (2020-09-23)
Expand Down
56 changes: 53 additions & 3 deletions asv_bench/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
# https://github.com/pydata/xarray/blob/master/asv_bench/benchmarks/__init__.py
import itertools

import numpy as np

_counter = itertools.count()
import xarray as xr


def parameterized(names, params):
Expand Down Expand Up @@ -47,3 +45,55 @@ def randint(low, high=None, size=None, frac_minus=None, seed=0):
x.flat[inds] = -1

return x


class Generate:
"""
Generate random xr.Dataset ds to be benckmarked.
"""

timeout = 600
repeat = (2, 5, 20)

def make_ds(self, nmember, nx, ny, chunks=None):

# ds
self.ds = xr.Dataset()
self.nmember = nmember
self.nx = nx
self.ny = ny

frac_nan = 0.0

members = np.arange(1, 1 + self.nmember)

lons = xr.DataArray(
np.linspace(0, 360, self.nx),
dims=("lon",),
attrs={"units": "degrees east", "long_name": "longitude"},
)
lats = xr.DataArray(
np.linspace(-90, 90, self.ny),
dims=("lat",),
attrs={"units": "degrees north", "long_name": "latitude"},
)
self.ds["tos"] = xr.DataArray(
randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan, chunks=chunks),
coords={"member": members, "lon": lons, "lat": lats},
dims=("member", "lon", "lat"),
name="tos",
attrs={"units": "foo units", "description": "a description"},
)
self.ds["sos"] = xr.DataArray(
randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan, chunks=chunks),
coords={"member": members, "lon": lons, "lat": lats},
dims=("member", "lon", "lat"),
name="sos",
attrs={"units": "foo units", "description": "a description"},
)
self.ds.attrs = {"history": "created for xskillscore benchmarking"}

# set nans for land sea mask
self.ds = self.ds.where(
(abs(self.ds.lat) > 20) | (self.ds.lat < 100) | (self.ds.lat > 160)
)
114 changes: 17 additions & 97 deletions asv_bench/benchmarks/deterministic.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Write the benchmarking functions here.
# See "Writing benchmarks" in the asv docs for more information.

import os

import numpy as np
import xarray as xr

from xskillscore import mae, mse, pearson_r, pearson_r_p_value, rmse

from . import parameterized, randn, requires_dask
from . import Generate, parameterized, requires_dask

DETERMINISTIC_METRICS = [rmse, pearson_r, mae, mse, pearson_r_p_value]

Expand All @@ -16,124 +17,43 @@
nmember = 4


class Generate:
"""
Generate random ds, control to be benckmarked.
"""

timeout = 600
repeat = (2, 5, 20)

def make_ds(self, nmember, nx, ny):

# ds
self.ds = xr.Dataset()
self.nmember = nmember
self.nx = nx # 4 deg
self.ny = ny # 4 deg

frac_nan = 0.0

members = np.arange(1, 1 + self.nmember)

lons = xr.DataArray(
np.linspace(0, 360, self.nx),
dims=("lon",),
attrs={"units": "degrees east", "long_name": "longitude"},
)
lats = xr.DataArray(
np.linspace(-90, 90, self.ny),
dims=("lat",),
attrs={"units": "degrees north", "long_name": "latitude"},
)
self.ds["tos"] = xr.DataArray(
randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan),
coords={"member": members, "lon": lons, "lat": lats},
dims=("member", "lon", "lat"),
name="tos",
encoding=None,
attrs={"units": "foo units", "description": "a description"},
)
self.ds["sos"] = xr.DataArray(
randn((self.nmember, self.nx, self.ny), frac_nan=frac_nan),
coords={"member": members, "lon": lons, "lat": lats},
dims=("member", "lon", "lat"),
name="sos",
encoding=None,
attrs={"units": "foo units", "description": "a description"},
)
self.ds.attrs = {"history": "created for xarray benchmarking"}

# set nans for land sea mask
self.ds = self.ds.where(
(abs(self.ds.lat) > 20) | (self.ds.lat < 100) | (self.ds.lat > 160)
)


class Compute_small(Generate):
"""
A benchmark xskillscore.metric for small xr.DataArrays"""

def setup(self, *args, **kwargs):
self.make_ds(nmember, 90, 45) # 4 degree grid
self.make_ds(nmember, 1, 1) # no grid

@parameterized("metric", DETERMINISTIC_METRICS)
def time_xskillscore_metric_small(self, metric):
def time_xskillscore_metric(self, metric):
"""Take time for xskillscore.metric."""
dim = "member"
metric(self.ds["tos"], self.ds["sos"], dim=dim)
metric(self.ds["tos"], self.ds["sos"], dim=dim).compute()

@parameterized("metric", DETERMINISTIC_METRICS)
def peakmem_xskillscore_metric_small(self, metric):
def peakmem_xskillscore_metric(self, metric):
dim = "member"
"""Take memory peak for xskillscore.metric."""
metric(self.ds["tos"], self.ds["sos"], dim=dim)
metric(self.ds["tos"], self.ds["sos"], dim=dim).compute()


class Compute_large(Generate):
class Compute_large(Compute_small):
"""
A benchmark xskillscore.metric for large xr.DataArrays"""

def setup_cache(self, *args, **kwargs):
self.make_ds(nmember, large_lon_lat, large_lon_lat)
self.ds.to_netcdf("large.nc")

def setup(self, *args, **kwargs):
self.ds = xr.open_dataset("large.nc")

@parameterized("metric", DETERMINISTIC_METRICS)
def time_xskillscore_metric_large(self, metric):
"""Take time for xskillscore.metric."""
dim = "member"
metric(self.ds["tos"], self.ds["sos"], dim=dim)

@parameterized("metric", DETERMINISTIC_METRICS)
def peakmem_xskillscore_metric_large(self, metric):
dim = "member"
"""Take memory peak for xskillscore.metric."""
metric(self.ds["tos"], self.ds["sos"], dim=dim)
self.make_ds(nmember, large_lon_lat, large_lon_lat)


class Compute_large_dask(Generate):
class Compute_large_dask(Compute_large):
"""
A benchmark xskillscore.metric for large xr.DataArrays with dask."""

def setup_cache(self, *args, **kwargs):
requires_dask()
self.make_ds(nmember, large_lon_lat, large_lon_lat)
self.ds.to_netcdf("large.nc")

def setup(self, *args, **kwargs):
self.ds = xr.open_dataset("large.nc", chunks={"lon": large_lon_lat_chunksize})

@parameterized("metric", DETERMINISTIC_METRICS)
def time_xskillscore_metric_large_dask(self, metric):
"""Take time for xskillscore.metric."""
dim = "member"
metric(self.ds["tos"], self.ds["sos"], dim=dim).compute()

@parameterized("metric", DETERMINISTIC_METRICS)
def peakmem_xskillscore_metric_large_dask(self, metric):
dim = "member"
"""Take memory peak for xskillscore.metric."""
metric(self.ds["tos"], self.ds["sos"], dim=dim).compute()
requires_dask()
self.make_ds(
nmember,
large_lon_lat,
large_lon_lat,
chunks={"lon": large_lon_lat_chunksize},
)

0 comments on commit f21a8bc

Please sign in to comment.