Merge remote-tracking branch 'upstream/master' into yohai-ds_scatter

* upstream/master: (122 commits) add missing , and article in error message (pydata#2557) Add libnetcdf, libhdf5, pydap and cfgrib to xarray.show_versions() (pydata#2555) revert to dev version for 0.11.1 Release xarray v0.11 DOC: update whatsnew for xarray 0.11 release (pydata#2548) Drop the hack needed to use CachingFileManager as we don't use it anymore. (pydata#2544) add full test env for py37 ci env (pydata#2545) Remove old-style resample example in documentation (pydata#2543) Stop loading tutorial data by default (pydata#2538) Remove the old syntax for resample. (pydata#2541) Remove use of deprecated, unused keyword. (pydata#2540) Deprecate inplace (pydata#2524) Zarr chunking (GH2300) (pydata#2487) Include multidimensional stacking groupby in docs (pydata#2493) (pydata#2536) Switch enable_cftimeindex to True by default (pydata#2516) Raise more informative error when converting tuples to Variable. (pydata#2523) Global option to always keep/discard attrs on operations (pydata#2482) Remove tests where answers change in cftime 1.0.2.1 (pydata#2522) Finish deprecation cycle for DataArray.__contains__ checking array values (pydata#2520) Fix bug where OverflowError is not being raised (pydata#2519) ...
yohai · Nov 19, 2018 · 7d19ae3 · 7d19ae3
2 parents fe7f16f + 70e9eb8
commit 7d19ae3
Show file tree

Hide file tree

Showing 130 changed files with 9,582 additions and 3,223 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,4 +1,3 @@
  - [ ] Closes #xxxx (remove if there is no corresponding issue, which should only be the case for minor changes)
  - [ ] Tests added (for all bug fixes or enhancements)
- - [ ] Tests passed (for all non-documentation changes)
  - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API (remove if this change should not be visible to users, e.g., if it is an internal clean-up, or if this is part of a larger project that will be documented later)
diff --git a/.pep8speaks.yml b/.pep8speaks.yml
@@ -0,0 +1,12 @@
+# File : .pep8speaks.yml
+
+scanner:
+    diff_only: True  # If True, errors caused by only the patch are shown
+
+pycodestyle:
+    max-line-length: 79
+    ignore:  # Errors and warnings to ignore
+        - E402,  # module level import not at top of file
+        - E731,  # do not assign a lambda expression, use a def
+        - W503   # line break before binary operator
+        - W504   # line break after binary operator
diff --git a/.stickler.yml b/.stickler.yml
diff --git a/.travis.yml b/.travis.yml
@@ -1,5 +1,5 @@
 # Based on http://conda.pydata.org/docs/travis.html
-language: python
+language: minimal
 sudo: false # use container based build
 notifications:
   email: false
@@ -10,76 +10,48 @@ branches:
 matrix:
   fast_finish: true
   include:
-  - python: 2.7
-    env: CONDA_ENV=py27-min
-  - python: 2.7
-    env: CONDA_ENV=py27-cdat+iris+pynio
-  - python: 3.5
-    env: CONDA_ENV=py35
-  - python: 3.6
-    env: CONDA_ENV=py36
-  - python: 3.6
-    env:
+  - env: CONDA_ENV=py27-min
+  - env: CONDA_ENV=py27-cdat+iris+pynio
+  - env: CONDA_ENV=py35
+  - env: CONDA_ENV=py36
+  - env: CONDA_ENV=py37
+  - env:
     - CONDA_ENV=py36
     - EXTRA_FLAGS="--run-flaky --run-network-tests"
-  - python: 3.6
-    env: CONDA_ENV=py36-netcdf4-dev
+  - env: CONDA_ENV=py36-netcdf4-dev
     addons:
       apt_packages:
         - libhdf5-serial-dev
         - netcdf-bin
         - libnetcdf-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-dask-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-pandas-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-bottleneck-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-condaforge-rc
-  - python: 3.6
-    env: CONDA_ENV=py36-pynio-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-rasterio1.0alpha
-  - python: 3.6
-    env: CONDA_ENV=py36-zarr-dev
-  - python: 3.5
-    env: CONDA_ENV=docs
-  - python: 3.6
-    env: CONDA_ENV=py36-hypothesis
+  - env: CONDA_ENV=py36-dask-dev
+  - env: CONDA_ENV=py36-pandas-dev
+  - env: CONDA_ENV=py36-bottleneck-dev
+  - env: CONDA_ENV=py36-condaforge-rc
+  - env: CONDA_ENV=py36-pynio-dev
+  - env: CONDA_ENV=py36-rasterio-0.36
+  - env: CONDA_ENV=py36-zarr-dev
+  - env: CONDA_ENV=docs
+  - env: CONDA_ENV=py36-hypothesis
+
   allow_failures:
-  - python: 3.6
-    env:
+  - env:
     - CONDA_ENV=py36
     - EXTRA_FLAGS="--run-flaky --run-network-tests"
-  - python: 3.6
-    env: CONDA_ENV=py36-netcdf4-dev
+  - env: CONDA_ENV=py36-netcdf4-dev
     addons:
       apt_packages:
         - libhdf5-serial-dev
         - netcdf-bin
         - libnetcdf-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-dask-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-pandas-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-bottleneck-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-condaforge-rc
-  - python: 3.6
-    env: CONDA_ENV=py36-pynio-dev
-  - python: 3.6
-    env: CONDA_ENV=py36-rasterio1.0alpha
-  - python: 3.6
-    env: CONDA_ENV=py36-zarr-dev
+  - env: CONDA_ENV=py36-pandas-dev
+  - env: CONDA_ENV=py36-bottleneck-dev
+  - env: CONDA_ENV=py36-condaforge-rc
+  - env: CONDA_ENV=py36-pynio-dev
+  - env: CONDA_ENV=py36-zarr-dev
 
 before_install:
-  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
-      wget http://repo.continuum.io/miniconda/Miniconda-3.16.0-Linux-x86_64.sh -O miniconda.sh;
-    else
-      wget http://repo.continuum.io/miniconda/Miniconda3-3.16.0-Linux-x86_64.sh -O miniconda.sh;
-    fi
+  - wget http://repo.continuum.io/miniconda/Miniconda3-3.16.0-Linux-x86_64.sh -O miniconda.sh;
   - bash miniconda.sh -b -p $HOME/miniconda
   - export PATH="$HOME/miniconda/bin:$PATH"
   - hash -r
@@ -99,9 +71,9 @@ install:
   - python xarray/util/print_versions.py
 
 script:
-  # TODO: restore this check once the upstream pandas issue is fixed:
-  # https://github.com/pandas-dev/pandas/issues/21071
-  # - python -OO -c "import xarray"
+  - which python
+  - python --version
+  - python -OO -c "import xarray"
   - if [[ "$CONDA_ENV" == "docs" ]]; then
       conda install -c conda-forge sphinx sphinx_rtd_theme sphinx-gallery numpydoc;
       sphinx-build -n -j auto -b html -d _build/doctrees doc _build/html;

diff --git a/HOW_TO_RELEASE b/HOW_TO_RELEASE
@@ -14,6 +14,7 @@ Time required: about an hour.
  5. Tag the release:
       git tag -a v0.X.Y -m 'v0.X.Y'
  6. Build source and binary wheels for pypi:
+      git clean -xdf  # this deletes all uncommited changes!
       python setup.py bdist_wheel sdist
  7. Use twine to register and upload the release on pypi. Be careful, you can't
     take this back!
@@ -37,16 +38,12 @@ Time required: about an hour.
       git push upstream master
     You're done pushing to master!
 12. Issue the release on GitHub. Click on "Draft a new release" at
-    https://github.com/pydata/xarray/releases and paste in the latest from
-    whats-new.rst.
+    https://github.com/pydata/xarray/releases. Type in the version number, but
+    don't bother to describe it -- we maintain that on the docs instead.
 13. Update the docs. Login to https://readthedocs.org/projects/xray/versions/
     and switch your new release tag (at the bottom) from "Inactive" to "Active".
     It should now build automatically.
-14. Update conda-forge. Clone https://github.com/conda-forge/xarray-feedstock
-    and update the version number and sha256 in meta.yaml. (On OS X, you can
-    calculate sha256 with `shasum -a 256 xarray-0.X.Y.tar.gz`). Submit a pull
-    request (and merge it, once CI passes).
-15. Issue the release announcement! For bug fix releases, I usually only email
+14. Issue the release announcement! For bug fix releases, I usually only email
     xarray@googlegroups.com. For major/feature releases, I will email a broader
     list (no more than once every 3-6 months):
       pydata@googlegroups.com, xarray@googlegroups.com,

diff --git a/README.rst b/README.rst
@@ -15,6 +15,8 @@ xarray: N-D labeled arrays and datasets
   :target: https://zenodo.org/badge/latestdoi/13221727
 .. image:: http://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat
   :target: http://pandas.pydata.org/speed/xarray/
+.. image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A
+  :target: http://numfocus.org
 
 **xarray** (formerly **xray**) is an open source project and Python package that aims to bring the
 labeled data power of pandas_ to the physical sciences, by providing
@@ -103,20 +105,36 @@ Get in touch
 .. _mailing list: https://groups.google.com/forum/#!forum/xarray
 .. _on GitHub: http://github.com/pydata/xarray
 
+NumFOCUS
+--------
+
+.. image:: https://numfocus.org/wp-content/uploads/2017/07/NumFocus_LRG.png
+   :scale: 25 %
+   :target: https://numfocus.org/
+
+Xarray is a fiscally sponsored project of NumFOCUS_, a nonprofit dedicated
+to supporting the open source scientific computing community. If you like
+Xarray and want to support our mission, please consider making a donation_
+to support our efforts.
+
+.. _donation: https://www.flipcause.com/secure/cause_pdetails/NDE2NTU=
+
 History
 -------
 
 xarray is an evolution of an internal tool developed at `The Climate
 Corporation`__. It was originally written by Climate Corp researchers Stephan
 Hoyer, Alex Kleeman and Eugene Brevdo and was released as open source in
-May 2014. The project was renamed from "xray" in January 2016.
+May 2014. The project was renamed from "xray" in January 2016. Xarray became a
+fiscally sponsored project of NumFOCUS_ in August 2018.
 
 __ http://climate.com/
+.. _NumFOCUS: https://numfocus.org
 
 License
 -------
 
-Copyright 2014-2017, xarray Developers
+Copyright 2014-2018, xarray Developers
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -64,6 +64,7 @@
         "scipy": [""],
         "bottleneck": ["", null],
         "dask": [""],
+        "distributed": [""],
     },
 
 

diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py
@@ -1,11 +1,13 @@
 from __future__ import absolute_import, division, print_function
 
+import os
+
 import numpy as np
 import pandas as pd
 
 import xarray as xr
 
-from . import randn, randint, requires_dask
+from . import randint, randn, requires_dask
 
 try:
     import dask
@@ -14,6 +16,9 @@
     pass
 
 
+os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
+
+
 class IOSingleNetCDF(object):
     """
     A few examples that benchmark reading/writing a single netCDF file with
@@ -163,7 +168,7 @@ def time_load_dataset_netcdf4_with_block_chunks_vindexing(self):
         ds = ds.isel(**self.vinds).load()
 
     def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_dataset(self.filepath, engine='netcdf4',
                             chunks=self.block_chunks).load()
 
@@ -172,7 +177,7 @@ def time_load_dataset_netcdf4_with_time_chunks(self):
                         chunks=self.time_chunks).load()
 
     def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_dataset(self.filepath, engine='netcdf4',
                             chunks=self.time_chunks).load()
 
@@ -189,7 +194,7 @@ def setup(self):
         self.ds.to_netcdf(self.filepath, format=self.format)
 
     def time_load_dataset_scipy_with_block_chunks(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_dataset(self.filepath, engine='scipy',
                             chunks=self.block_chunks).load()
 
@@ -204,7 +209,7 @@ def time_load_dataset_scipy_with_block_chunks_vindexing(self):
         ds = ds.isel(**self.vinds).load()
 
     def time_load_dataset_scipy_with_time_chunks(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_dataset(self.filepath, engine='scipy',
                             chunks=self.time_chunks).load()
 
@@ -344,7 +349,7 @@ def time_load_dataset_netcdf4_with_block_chunks(self):
                           chunks=self.block_chunks).load()
 
     def time_load_dataset_netcdf4_with_block_chunks_multiprocessing(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='netcdf4',
                               chunks=self.block_chunks).load()
 
@@ -353,7 +358,7 @@ def time_load_dataset_netcdf4_with_time_chunks(self):
                           chunks=self.time_chunks).load()
 
     def time_load_dataset_netcdf4_with_time_chunks_multiprocessing(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='netcdf4',
                               chunks=self.time_chunks).load()
 
@@ -362,7 +367,7 @@ def time_open_dataset_netcdf4_with_block_chunks(self):
                           chunks=self.block_chunks)
 
     def time_open_dataset_netcdf4_with_block_chunks_multiprocessing(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='netcdf4',
                               chunks=self.block_chunks)
 
@@ -371,7 +376,7 @@ def time_open_dataset_netcdf4_with_time_chunks(self):
                           chunks=self.time_chunks)
 
     def time_open_dataset_netcdf4_with_time_chunks_multiprocessing(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='netcdf4',
                               chunks=self.time_chunks)
 
@@ -387,21 +392,57 @@ def setup(self):
                           format=self.format)
 
     def time_load_dataset_scipy_with_block_chunks(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='scipy',
                               chunks=self.block_chunks).load()
 
     def time_load_dataset_scipy_with_time_chunks(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='scipy',
                               chunks=self.time_chunks).load()
 
     def time_open_dataset_scipy_with_block_chunks(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='scipy',
                               chunks=self.block_chunks)
 
     def time_open_dataset_scipy_with_time_chunks(self):
-        with dask.set_options(get=dask.multiprocessing.get):
+        with dask.config.set(scheduler="multiprocessing"):
             xr.open_mfdataset(self.filenames_list, engine='scipy',
                               chunks=self.time_chunks)
+
+
+def create_delayed_write():
+    import dask.array as da
+    vals = da.random.random(300, chunks=(1,))
+    ds = xr.Dataset({'vals': (['a'], vals)})
+    return ds.to_netcdf('file.nc', engine='netcdf4', compute=False)
+
+
+class IOWriteNetCDFDask(object):
+    timeout = 60
+    repeat = 1
+    number = 5
+
+    def setup(self):
+        requires_dask()
+        self.write = create_delayed_write()
+
+    def time_write(self):
+        self.write.compute()
+
+
+class IOWriteNetCDFDaskDistributed(object):
+    def setup(self):
+        try:
+            import distributed
+        except ImportError:
+            raise NotImplementedError
+        self.client = distributed.Client()
+        self.write = create_delayed_write()
+
+    def cleanup(self):
+        self.client.shutdown()
+
+    def time_write(self):
+        self.write.compute()